blob: 5c92d05f9b8401be9d87de67c1b25c39d952cbb0 [file] [log] [blame]
PREHOOK: query: DROP TABLE part_staging
PREHOOK: type: DROPTABLE
POSTHOOK: query: DROP TABLE part_staging
POSTHOOK: type: DROPTABLE
PREHOOK: query: DROP TABLE part_orc
PREHOOK: type: DROPTABLE
POSTHOOK: query: DROP TABLE part_orc
POSTHOOK: type: DROPTABLE
PREHOOK: query: CREATE TABLE part_staging(
p_partkey INT,
p_name STRING,
p_mfgr STRING,
p_brand STRING,
p_type STRING,
p_size INT,
p_container STRING,
p_retailprice DOUBLE,
p_comment STRING
)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@part_staging
POSTHOOK: query: CREATE TABLE part_staging(
p_partkey INT,
p_name STRING,
p_mfgr STRING,
p_brand STRING,
p_type STRING,
p_size INT,
p_container STRING,
p_retailprice DOUBLE,
p_comment STRING
)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@part_staging
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tpch/tiny/part.tbl.bz2' overwrite into table part_staging
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@part_staging
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tpch/tiny/part.tbl.bz2' overwrite into table part_staging
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@part_staging
PREHOOK: query: CREATE TABLE part_orc(
p_partkey INT,
p_name STRING,
p_mfgr STRING,
p_brand STRING,
p_type STRING,
p_size INT,
p_container STRING,
p_retailprice DOUBLE,
p_comment STRING
) STORED AS ORC
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@part_orc
POSTHOOK: query: CREATE TABLE part_orc(
p_partkey INT,
p_name STRING,
p_mfgr STRING,
p_brand STRING,
p_type STRING,
p_size INT,
p_container STRING,
p_retailprice DOUBLE,
p_comment STRING
) STORED AS ORC
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@part_orc
PREHOOK: query: DESCRIBE EXTENDED part_orc
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@part_orc
POSTHOOK: query: DESCRIBE EXTENDED part_orc
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@part_orc
p_partkey int
p_name string
p_mfgr string
p_brand string
p_type string
p_size int
p_container string
p_retailprice double
p_comment string
#### A masked pattern was here ####
PREHOOK: query: insert into table part_orc select * from part_staging
PREHOOK: type: QUERY
PREHOOK: Input: default@part_staging
PREHOOK: Output: default@part_orc
POSTHOOK: query: insert into table part_orc select * from part_staging
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_staging
POSTHOOK: Output: default@part_orc
POSTHOOK: Lineage: part_orc.p_brand SIMPLE [(part_staging)part_staging.FieldSchema(name:p_brand, type:string, comment:null), ]
POSTHOOK: Lineage: part_orc.p_comment SIMPLE [(part_staging)part_staging.FieldSchema(name:p_comment, type:string, comment:null), ]
POSTHOOK: Lineage: part_orc.p_container SIMPLE [(part_staging)part_staging.FieldSchema(name:p_container, type:string, comment:null), ]
POSTHOOK: Lineage: part_orc.p_mfgr SIMPLE [(part_staging)part_staging.FieldSchema(name:p_mfgr, type:string, comment:null), ]
POSTHOOK: Lineage: part_orc.p_name SIMPLE [(part_staging)part_staging.FieldSchema(name:p_name, type:string, comment:null), ]
POSTHOOK: Lineage: part_orc.p_partkey SIMPLE [(part_staging)part_staging.FieldSchema(name:p_partkey, type:int, comment:null), ]
POSTHOOK: Lineage: part_orc.p_retailprice SIMPLE [(part_staging)part_staging.FieldSchema(name:p_retailprice, type:double, comment:null), ]
POSTHOOK: Lineage: part_orc.p_size SIMPLE [(part_staging)part_staging.FieldSchema(name:p_size, type:int, comment:null), ]
POSTHOOK: Lineage: part_orc.p_type SIMPLE [(part_staging)part_staging.FieldSchema(name:p_type, type:string, comment:null), ]
PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name
)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name
)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 2:string, 1:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 2:string
valueColumns: 5:int, 7:double
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_size (type: int), p_retailprice (type: double)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [1, 2, 5, 7]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
outputColumnNames: _col1, _col2, _col5, _col7
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col1: string, _col2: string, _col5: int, _col7: double
type: TABLE
Partition table definition
input alias: ptf_1
name: noop
order by: _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int, _col7: double
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int), _col7 (type: double)
Reducer 3
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder: az
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 4
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, bigint, double, double]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
outputColumnNames: _col1, _col2, _col5, _col7
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 0, 2, 3]
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int, _col7: double
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col1 ASC NULLS LAST
partition by: _col2
raw input shape:
window functions:
window function definition
alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: ROWS PRECEDING(MAX)~CURRENT
PTF Vectorization:
allEvaluatorsAreStreaming: true
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum]
functionInputExpressions: [col 1:string, col 1:string, col 3:double]
functionNames: [rank, dense_rank, sum]
keyInputColumns: [1, 0]
native: true
nonKeyInputColumns: [2, 3]
orderExpressions: [col 1:string]
outputColumns: [4, 5, 6, 1, 0, 2, 3]
outputTypes: [int, int, double, string, string, int, double]
partitionExpressions: [col 0:string]
streamingColumns: [4, 5, 6]
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 4, 5, 7]
selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name
)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name
)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15
Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3
Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06
Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001
Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001
Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001
Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68
Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38
Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001
Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62
Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68
Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95
Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34
Manufacturer#3 almond antique misty red olive 1 4 4 6195.32
Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61
Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67
Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09
Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35
Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27
Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001
Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69
Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004
Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08
Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18
Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66
PREHOOK: query: explain vectorization detail
select p_mfgr, p_name,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j
distribute by j.p_mfgr
sort by j.p_name)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j
distribute by j.p_mfgr
sort by j.p_name)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: p1
filterExpr: p_partkey is not null (type: boolean)
Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Filter Operator
Filter Vectorization:
className: VectorFilterOperator
native: true
predicateExpression: SelectColumnIsNotNull(col 0:int)
predicate: p_partkey is not null (type: boolean)
Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: p_partkey (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: p_partkey (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkLongOperator
keyColumns: 0:int
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumns: 1:string, 2:string, 5:int
Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [0, 1, 2, 5]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Map 5
Map Operator Tree:
TableScan
alias: p2
filterExpr: p_partkey is not null (type: boolean)
Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Filter Operator
Filter Vectorization:
className: VectorFilterOperator
native: true
predicateExpression: SelectColumnIsNotNull(col 0:int)
predicate: p_partkey is not null (type: boolean)
Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: p_partkey (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: p_partkey (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkLongOperator
keyColumns: 0:int
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [0]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 p_partkey (type: int)
1 p_partkey (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int)
MergeJoin Vectorization:
enabled: false
enableConditionsNotMet: Vectorizing MergeJoin Supported IS false
Reducer 3
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: j
output shape: _col1: string, _col2: string, _col5: int
type: SUBQUERY
Partition table definition
input alias: ptf_1
name: noop
order by: _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2
raw input shape:
Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int)
Reducer 4
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder: az
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, bigint, bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 0, 2]
Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col1 ASC NULLS LAST
partition by: _col2
raw input shape:
window functions:
window function definition
alias: lag_window_0
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
PTF Vectorization:
allEvaluatorsAreStreaming: false
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorLag]
functionInputExpressions: [col 2:int]
functionNames: [lag]
keyInputColumns: [1, 0]
native: true
nonKeyInputColumns: [2]
orderExpressions: [col 1:string]
outputColumns: [3, 1, 0, 2]
outputTypes: [int, string, string, int]
partitionExpressions: [col 0:string]
streamingColumns: []
Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 5]
selectExpressions: LongColSubtractLongColumn(col 2:int, col 3:int) -> 5:int
Statistics: Num rows: 27 Data size: 6129 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 27 Data size: 6129 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_name,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j
distribute by j.p_mfgr
sort by j.p_name)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_name,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j
distribute by j.p_mfgr
sort by j.p_name)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 2 0
Manufacturer#1 almond antique burnished rose metallic 2 0
Manufacturer#1 almond antique burnished rose metallic 2 0
Manufacturer#1 almond antique burnished rose metallic 2 0
Manufacturer#1 almond antique chartreuse lavender yellow 34 32
Manufacturer#1 almond antique salmon chartreuse burlywood 6 -28
Manufacturer#1 almond aquamarine burnished black steel 28 22
Manufacturer#1 almond aquamarine pink moccasin thistle 42 14
Manufacturer#2 almond antique violet chocolate turquoise 14 0
Manufacturer#2 almond antique violet turquoise frosted 40 26
Manufacturer#2 almond aquamarine midnight light salmon 2 -38
Manufacturer#2 almond aquamarine rose maroon antique 25 23
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 -7
Manufacturer#3 almond antique chartreuse khaki white 17 0
Manufacturer#3 almond antique forest lavender goldenrod 14 -3
Manufacturer#3 almond antique metallic orange dim 19 5
Manufacturer#3 almond antique misty red olive 1 -18
Manufacturer#3 almond antique olive coral navajo 45 44
Manufacturer#4 almond antique gainsboro frosted violet 10 0
Manufacturer#4 almond antique violet mint lemon 39 29
Manufacturer#4 almond aquamarine floral ivory bisque 27 -12
Manufacturer#4 almond aquamarine yellow dodger mint 7 -20
Manufacturer#4 almond azure aquamarine papaya violet 12 5
Manufacturer#5 almond antique blue firebrick mint 31 0
Manufacturer#5 almond antique medium spring khaki 6 -25
Manufacturer#5 almond antique sky peru orange 2 -4
Manufacturer#5 almond aquamarine dodger light gainsboro 46 44
Manufacturer#5 almond azure blanched chiffon midnight 23 -23
PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size
from noop(on part_orc
partition by p_mfgr
order by p_name)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size
from noop(on part_orc
partition by p_mfgr
order by p_name)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 2:string, 1:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 2:string
valueColumns: 5:int
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_size (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [1, 2, 5]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col1: string, _col2: string, _col5: int
type: TABLE
Partition table definition
input alias: ptf_1
name: noop
order by: _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_name, p_size
from noop(on part_orc
partition by p_mfgr
order by p_name)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_name, p_size
from noop(on part_orc
partition by p_mfgr
order by p_name)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 2
Manufacturer#1 almond antique burnished rose metallic 2
Manufacturer#1 almond antique chartreuse lavender yellow 34
Manufacturer#1 almond antique salmon chartreuse burlywood 6
Manufacturer#1 almond aquamarine burnished black steel 28
Manufacturer#1 almond aquamarine pink moccasin thistle 42
Manufacturer#2 almond antique violet chocolate turquoise 14
Manufacturer#2 almond antique violet turquoise frosted 40
Manufacturer#2 almond aquamarine midnight light salmon 2
Manufacturer#2 almond aquamarine rose maroon antique 25
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18
Manufacturer#3 almond antique chartreuse khaki white 17
Manufacturer#3 almond antique forest lavender goldenrod 14
Manufacturer#3 almond antique metallic orange dim 19
Manufacturer#3 almond antique misty red olive 1
Manufacturer#3 almond antique olive coral navajo 45
Manufacturer#4 almond antique gainsboro frosted violet 10
Manufacturer#4 almond antique violet mint lemon 39
Manufacturer#4 almond aquamarine floral ivory bisque 27
Manufacturer#4 almond aquamarine yellow dodger mint 7
Manufacturer#4 almond azure aquamarine papaya violet 12
Manufacturer#5 almond antique blue firebrick mint 31
Manufacturer#5 almond antique medium spring khaki 6
Manufacturer#5 almond antique sky peru orange 2
Manufacturer#5 almond aquamarine dodger light gainsboro 46
Manufacturer#5 almond azure blanched chiffon midnight 23
PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name
) abc
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name
) abc
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 2:string, 1:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 2:string
valueColumns: 5:int, 7:double
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_size (type: int), p_retailprice (type: double)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [1, 2, 5, 7]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
outputColumnNames: _col1, _col2, _col5, _col7
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col1: string, _col2: string, _col5: int, _col7: double
type: TABLE
Partition table definition
input alias: abc
name: noop
order by: _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int, _col7: double
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int), _col7 (type: double)
Reducer 3
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder: az
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 4
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, bigint, double, double]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
outputColumnNames: _col1, _col2, _col5, _col7
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 0, 2, 3]
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int, _col7: double
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col1 ASC NULLS LAST
partition by: _col2
raw input shape:
window functions:
window function definition
alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: ROWS PRECEDING(MAX)~CURRENT
PTF Vectorization:
allEvaluatorsAreStreaming: true
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum]
functionInputExpressions: [col 1:string, col 1:string, col 3:double]
functionNames: [rank, dense_rank, sum]
keyInputColumns: [1, 0]
native: true
nonKeyInputColumns: [2, 3]
orderExpressions: [col 1:string]
outputColumns: [4, 5, 6, 1, 0, 2, 3]
outputTypes: [int, int, double, string, string, int, double]
partitionExpressions: [col 0:string]
streamingColumns: [4, 5, 6]
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 4, 5, 7]
selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name
) abc
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name
) abc
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15
Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3
Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06
Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.65
Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.07
Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.73
Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68
Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38
Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.36
Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62
Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68
Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95
Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34
Manufacturer#3 almond antique misty red olive 1 4 4 6195.32
Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61
Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67
Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09
Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35
Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27
Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.62
Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69
Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35
Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08
Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18
Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66
PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop(on part_orc
partition by p_mfgr
order by p_name
)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop(on part_orc
partition by p_mfgr
order by p_name
)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 2:string, 1:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 2:string
valueColumns: 5:int
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_size (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [1, 2, 5]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col1: string, _col2: string, _col5: int
type: TABLE
Partition table definition
input alias: ptf_1
name: noop
order by: _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int)
Reducer 3
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder: az
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 0, 2]
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col1 ASC NULLS LAST
partition by: _col2
raw input shape:
window functions:
window function definition
alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: lag_window_2
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
PTF Vectorization:
allEvaluatorsAreStreaming: false
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorLag]
functionInputExpressions: [col 1:string, col 1:string, col 2:int]
functionNames: [rank, dense_rank, lag]
keyInputColumns: [1, 0]
native: true
nonKeyInputColumns: [2]
orderExpressions: [col 1:string]
outputColumns: [3, 4, 5, 1, 0, 2]
outputTypes: [int, int, int, string, string, int]
partitionExpressions: [col 0:string]
streamingColumns: [3, 4]
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 3, 4, 2, 7]
selectExpressions: LongColSubtractLongColumn(col 2:int, col 5:int) -> 7:int
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop(on part_orc
partition by p_mfgr
order by p_name
)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop(on part_orc
partition by p_mfgr
order by p_name
)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 2 1 1 2 0
Manufacturer#1 almond antique burnished rose metallic 2 1 1 2 0
Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 34 32
Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 6 -28
Manufacturer#1 almond aquamarine burnished black steel 28 5 4 28 22
Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 42 14
Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 14 0
Manufacturer#2 almond antique violet turquoise frosted 40 2 2 40 26
Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 2 -38
Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 25 23
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 18 -7
Manufacturer#3 almond antique chartreuse khaki white 17 1 1 17 0
Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 14 -3
Manufacturer#3 almond antique metallic orange dim 19 3 3 19 5
Manufacturer#3 almond antique misty red olive 1 4 4 1 -18
Manufacturer#3 almond antique olive coral navajo 45 5 5 45 44
Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 10 0
Manufacturer#4 almond antique violet mint lemon 39 2 2 39 29
Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 27 -12
Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 7 -20
Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 12 5
Manufacturer#5 almond antique blue firebrick mint 31 1 1 31 0
Manufacturer#5 almond antique medium spring khaki 6 2 2 6 -25
Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4
Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44
Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23
PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop(on part_orc
partition by p_mfgr
order by p_name
)
group by p_mfgr, p_name, p_size
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop(on part_orc
partition by p_mfgr
order by p_name
)
group by p_mfgr, p_name, p_size
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 2:string, 1:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 2:string
valueColumns: 5:int
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_size (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [1, 2, 5]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col1: string, _col2: string, _col5: int
type: TABLE
Partition table definition
input alias: ptf_1
name: noop
order by: _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col2 (type: string), _col1 (type: string), _col5 (type: int)
minReductionHashAggr: 0.4
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 25 Data size: 5575 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
null sort order: azz
sort order: +++
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 25 Data size: 5575 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 3
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: Only PTF directly under reduce-shuffle is supported
vectorized: false
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 25 Data size: 5575 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
outputColumnNames: _col0, _col1, _col2
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col0: string, _col1: string, _col2: int
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col1 ASC NULLS LAST
partition by: _col0
raw input shape:
window functions:
window function definition
alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: lag_window_2
arguments: _col2, 1, _col2
name: lag
window function: GenericUDAFLagEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
Statistics: Num rows: 25 Data size: 5575 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 25 Data size: 5975 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 25 Data size: 5975 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop(on part_orc
partition by p_mfgr
order by p_name
)
group by p_mfgr, p_name, p_size
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop(on part_orc
partition by p_mfgr
order by p_name
)
group by p_mfgr, p_name, p_size
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 2 1 1 2 0
Manufacturer#1 almond antique chartreuse lavender yellow 34 2 2 34 32
Manufacturer#1 almond antique salmon chartreuse burlywood 6 3 3 6 -28
Manufacturer#1 almond aquamarine burnished black steel 28 4 4 28 22
Manufacturer#1 almond aquamarine pink moccasin thistle 42 5 5 42 14
Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 14 0
Manufacturer#2 almond antique violet turquoise frosted 40 2 2 40 26
Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 2 -38
Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 25 23
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 18 -7
Manufacturer#3 almond antique chartreuse khaki white 17 1 1 17 0
Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 14 -3
Manufacturer#3 almond antique metallic orange dim 19 3 3 19 5
Manufacturer#3 almond antique misty red olive 1 4 4 1 -18
Manufacturer#3 almond antique olive coral navajo 45 5 5 45 44
Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 10 0
Manufacturer#4 almond antique violet mint lemon 39 2 2 39 29
Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 27 -12
Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 7 -20
Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 12 5
Manufacturer#5 almond antique blue firebrick mint 31 1 1 31 0
Manufacturer#5 almond antique medium spring khaki 6 2 2 6 -25
Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4
Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44
Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23
PREHOOK: query: explain vectorization detail
select abc.*
from noop(on part_orc
partition by p_mfgr
order by p_name
) abc join part_orc p1 on abc.p_partkey = p1.p_partkey
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select abc.*
from noop(on part_orc
partition by p_mfgr
order by p_name
) abc join part_orc p1 on abc.p_partkey = p1.p_partkey
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 2:string, 1:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 2:string
valueColumns: 0:int, 3:string, 4:string, 5:int, 6:string, 7:double, 8:string
Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
Filter Operator
Filter Vectorization:
className: VectorFilterOperator
native: true
predicateExpression: SelectColumnIsNotNull(col 0:int)
predicate: p_partkey is not null (type: boolean)
Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: p_partkey (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: p_partkey (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkLongOperator
keyColumns: 0:int
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col0: int, _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: string, _col7: double, _col8: string
type: TABLE
Partition table definition
input alias: abc
name: noop
order by: _col1 ASC NULLS LAST
output shape: _col0: int, _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: string, _col7: double, _col8: string
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: _col0 is not null (type: boolean)
Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
Reducer 3
Execution mode: llap
Reduce Operator Tree:
Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 p_partkey (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
MergeJoin Vectorization:
enabled: false
enableConditionsNotMet: Vectorizing MergeJoin Supported IS false
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select abc.*
from noop(on part_orc
partition by p_mfgr
order by p_name
) abc join part_orc p1 on abc.p_partkey = p1.p_partkey
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select abc.*
from noop(on part_orc
partition by p_mfgr
order by p_name
) abc join part_orc p1 on abc.p_partkey = p1.p_partkey
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ
110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously
112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car
121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even
144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about
146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref
15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu
155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra
17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the
17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve
191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle
192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir
195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de
33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful
40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s
42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl
45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful
48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i
49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick
65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr
78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith
85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull
86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully
90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl
PREHOOK: query: explain vectorization detail
select abc.*
from part_orc p1 join noop(on part_orc
partition by p_mfgr
order by p_name
) abc on abc.p_partkey = p1.p_partkey
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select abc.*
from part_orc p1 join noop(on part_orc
partition by p_mfgr
order by p_name
) abc on abc.p_partkey = p1.p_partkey
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 2:string, 1:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 2:string
valueColumns: 0:int, 3:string, 4:string, 5:int, 6:string, 7:double, 8:string
Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
Filter Operator
Filter Vectorization:
className: VectorFilterOperator
native: true
predicateExpression: SelectColumnIsNotNull(col 0:int)
predicate: p_partkey is not null (type: boolean)
Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: p_partkey (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: p_partkey (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkLongOperator
keyColumns: 0:int
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col0: int, _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: string, _col7: double, _col8: string
type: TABLE
Partition table definition
input alias: abc
name: noop
order by: _col1 ASC NULLS LAST
output shape: _col0: int, _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: string, _col7: double, _col8: string
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: _col0 is not null (type: boolean)
Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
Reducer 3
Execution mode: llap
Reduce Operator Tree:
Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 p_partkey (type: int)
1 _col0 (type: int)
outputColumnNames: _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21
Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col13 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: double), _col21 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
MergeJoin Vectorization:
enabled: false
enableConditionsNotMet: Vectorizing MergeJoin Supported IS false
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select abc.*
from part_orc p1 join noop(on part_orc
partition by p_mfgr
order by p_name
) abc on abc.p_partkey = p1.p_partkey
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select abc.*
from part_orc p1 join noop(on part_orc
partition by p_mfgr
order by p_name
) abc on abc.p_partkey = p1.p_partkey
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ
110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously
112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car
121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even
144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about
146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref
15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu
155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra
17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the
17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve
191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle
192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir
195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de
33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful
40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s
42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl
45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful
48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i
49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick
65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr
78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith
85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull
86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully
90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl
PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name, p_size desc) as r
from noopwithmap(on part_orc
partition by p_mfgr
order by p_name, p_size desc)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name, p_size desc) as r
from noopwithmap(on part_orc
partition by p_mfgr
order by p_name, p_size desc)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: p_name: string, p_mfgr: string, p_size: int
type: TABLE
Partition table definition
input alias: ptf_1
name: noopwithmap
order by: p_name ASC NULLS LAST, p_size DESC NULLS FIRST
output shape: p_name: string, p_mfgr: string, p_size: int
partition by: p_mfgr
raw input shape:
transforms raw input: true
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Map-side function: true
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int)
null sort order: aza
sort order: ++-
Map-reduce partition columns: p_mfgr (type: string)
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
notVectorizedReason: PTF operator: PTF Mapper not supported
vectorized: false
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col1: string, _col2: string, _col5: int
type: TABLE
Partition table definition
input alias: ptf_1
name: noopwithmap
order by: _col1 ASC NULLS LAST, _col5 DESC NULLS FIRST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2
raw input shape:
transforms raw input: true
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int)
null sort order: aza
sort order: ++-
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 3
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder: aza
reduceColumnSortOrder: ++-
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, KEY.reducesinkkey2:int
partitionColumnCount: 0
scratchColumnTypeNames: [bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int)
outputColumnNames: _col1, _col2, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 0, 2]
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col1 ASC NULLS LAST, _col5 DESC NULLS FIRST
partition by: _col2
raw input shape:
window functions:
window function definition
alias: rank_window_0
arguments: _col1, _col5
name: rank
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
PTF Vectorization:
allEvaluatorsAreStreaming: true
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorRank]
functionInputExpressions: [col 1:string]
functionNames: [rank]
keyInputColumns: [1, 0, 2]
native: true
nonKeyInputColumns: []
orderExpressions: [col 1:string, col 2:int]
outputColumns: [3, 1, 0, 2]
outputTypes: [int, string, string, int]
partitionExpressions: [col 0:string]
streamingColumns: [3]
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 3]
Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name, p_size desc) as r
from noopwithmap(on part_orc
partition by p_mfgr
order by p_name, p_size desc)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name, p_size desc) as r
from noopwithmap(on part_orc
partition by p_mfgr
order by p_name, p_size desc)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 2 1
Manufacturer#1 almond antique burnished rose metallic 2 1
Manufacturer#1 almond antique chartreuse lavender yellow 34 3
Manufacturer#1 almond antique salmon chartreuse burlywood 6 4
Manufacturer#1 almond aquamarine burnished black steel 28 5
Manufacturer#1 almond aquamarine pink moccasin thistle 42 6
Manufacturer#2 almond antique violet chocolate turquoise 14 1
Manufacturer#2 almond antique violet turquoise frosted 40 2
Manufacturer#2 almond aquamarine midnight light salmon 2 3
Manufacturer#2 almond aquamarine rose maroon antique 25 4
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5
Manufacturer#3 almond antique chartreuse khaki white 17 1
Manufacturer#3 almond antique forest lavender goldenrod 14 2
Manufacturer#3 almond antique metallic orange dim 19 3
Manufacturer#3 almond antique misty red olive 1 4
Manufacturer#3 almond antique olive coral navajo 45 5
Manufacturer#4 almond antique gainsboro frosted violet 10 1
Manufacturer#4 almond antique violet mint lemon 39 2
Manufacturer#4 almond aquamarine floral ivory bisque 27 3
Manufacturer#4 almond aquamarine yellow dodger mint 7 4
Manufacturer#4 almond azure aquamarine papaya violet 12 5
Manufacturer#5 almond antique blue firebrick mint 31 1
Manufacturer#5 almond antique medium spring khaki 6 2
Manufacturer#5 almond antique sky peru orange 2 3
Manufacturer#5 almond aquamarine dodger light gainsboro 46 4
Manufacturer#5 almond azure blanched chiffon midnight 23 5
PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noopwithmap(on part_orc
partition by p_mfgr
order by p_name)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noopwithmap(on part_orc
partition by p_mfgr
order by p_name)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: p_name: string, p_mfgr: string, p_size: int, p_retailprice: double
type: TABLE
Partition table definition
input alias: ptf_1
name: noopwithmap
order by: p_name ASC NULLS LAST
output shape: p_name: string, p_mfgr: string, p_size: int, p_retailprice: double
partition by: p_mfgr
raw input shape:
transforms raw input: true
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
Map-side function: true
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_size (type: int), p_retailprice (type: double)
Execution mode: llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
notVectorizedReason: PTF operator: PTF Mapper not supported
vectorized: false
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
outputColumnNames: _col1, _col2, _col5, _col7
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col1: string, _col2: string, _col5: int, _col7: double
type: TABLE
Partition table definition
input alias: ptf_1
name: noopwithmap
order by: _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int, _col7: double
partition by: _col2
raw input shape:
transforms raw input: true
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int), _col7 (type: double)
Reducer 3
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder: az
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 4
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, bigint, double, double]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
outputColumnNames: _col1, _col2, _col5, _col7
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 0, 2, 3]
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int, _col7: double
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col1 ASC NULLS LAST
partition by: _col2
raw input shape:
window functions:
window function definition
alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: ROWS PRECEDING(MAX)~CURRENT
PTF Vectorization:
allEvaluatorsAreStreaming: true
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum]
functionInputExpressions: [col 1:string, col 1:string, col 3:double]
functionNames: [rank, dense_rank, sum]
keyInputColumns: [1, 0]
native: true
nonKeyInputColumns: [2, 3]
orderExpressions: [col 1:string]
outputColumns: [4, 5, 6, 1, 0, 2, 3]
outputTypes: [int, int, double, string, string, int, double]
partitionExpressions: [col 0:string]
streamingColumns: [4, 5, 6]
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 4, 5, 7]
selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noopwithmap(on part_orc
partition by p_mfgr
order by p_name)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noopwithmap(on part_orc
partition by p_mfgr
order by p_name)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15
Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3
Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06
Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.65
Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.07
Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.73
Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68
Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38
Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.36
Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62
Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68
Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95
Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34
Manufacturer#3 almond antique misty red olive 1 4 4 6195.32
Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61
Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67
Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09
Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35
Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27
Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.62
Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69
Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35
Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08
Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18
Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66
PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 2:string, 1:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 2:string
valueColumns: 5:int, 7:double
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_size (type: int), p_retailprice (type: double)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [1, 2, 5, 7]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
outputColumnNames: _col1, _col2, _col5, _col7
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col1: string, _col2: string, _col5: int, _col7: double
type: TABLE
Partition table definition
input alias: ptf_1
name: noop
order by: _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int, _col7: double
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int), _col7 (type: double)
Reducer 3
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder: az
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 4
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, bigint, double, double]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
outputColumnNames: _col1, _col2, _col5, _col7
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 0, 2, 3]
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int, _col7: double
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col1 ASC NULLS LAST
partition by: _col2
raw input shape:
window functions:
window function definition
alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: ROWS PRECEDING(MAX)~CURRENT
PTF Vectorization:
allEvaluatorsAreStreaming: true
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum]
functionInputExpressions: [col 1:string, col 1:string, col 3:double]
functionNames: [rank, dense_rank, sum]
keyInputColumns: [1, 0]
native: true
nonKeyInputColumns: [2, 3]
orderExpressions: [col 1:string]
outputColumns: [4, 5, 6, 1, 0, 2, 3]
outputTypes: [int, int, double, string, string, int, double]
partitionExpressions: [col 0:string]
streamingColumns: [4, 5, 6]
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 4, 5, 7]
selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15
Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3
Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06
Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.65
Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.07
Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.73
Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68
Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38
Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.36
Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62
Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68
Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95
Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34
Manufacturer#3 almond antique misty red olive 1 4 4 6195.32
Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61
Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67
Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09
Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35
Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27
Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.62
Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69
Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35
Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08
Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18
Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66
PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on noopwithmap(on noop(on part_orc
partition by p_mfgr
order by p_mfgr, p_name
)))
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on noopwithmap(on noop(on part_orc
partition by p_mfgr
order by p_mfgr, p_name
)))
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 2:string, 1:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 2:string
valueColumns: 5:int, 7:double
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_size (type: int), p_retailprice (type: double)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [1, 2, 5, 7]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
outputColumnNames: _col1, _col2, _col5, _col7
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col1: string, _col2: string, _col5: int, _col7: double
type: TABLE
Partition table definition
input alias: ptf_1
name: noop
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int, _col7: double
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int, _col7: double
type: PTFCOMPONENT
Partition table definition
input alias: ptf_1
name: noopwithmap
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int, _col7: double
partition by: _col2
raw input shape:
transforms raw input: true
Partition table definition
input alias: ptf_2
name: noop
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int, _col7: double
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
Map-side function: true
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int), _col7 (type: double)
Reducer 3
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
outputColumnNames: _col1, _col2, _col5, _col7
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int, _col7: double
type: PTFCOMPONENT
Partition table definition
input alias: ptf_1
name: noopwithmap
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int, _col7: double
partition by: _col2
raw input shape:
transforms raw input: true
Partition table definition
input alias: ptf_2
name: noop
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int, _col7: double
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int), _col7 (type: double)
Reducer 4
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder: az
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 4
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, bigint, double, double]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
outputColumnNames: _col1, _col2, _col5, _col7
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 0, 2, 3]
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int, _col7: double
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col1 ASC NULLS LAST
partition by: _col2
raw input shape:
window functions:
window function definition
alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: ROWS PRECEDING(MAX)~CURRENT
PTF Vectorization:
allEvaluatorsAreStreaming: true
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum]
functionInputExpressions: [col 1:string, col 1:string, col 3:double]
functionNames: [rank, dense_rank, sum]
keyInputColumns: [1, 0]
native: true
nonKeyInputColumns: [2, 3]
orderExpressions: [col 1:string]
outputColumns: [4, 5, 6, 1, 0, 2, 3]
outputTypes: [int, int, double, string, string, int, double]
partitionExpressions: [col 0:string]
streamingColumns: [4, 5, 6]
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 4, 5, 7]
selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on noopwithmap(on noop(on part_orc
partition by p_mfgr
order by p_mfgr, p_name
)))
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on noopwithmap(on noop(on part_orc
partition by p_mfgr
order by p_mfgr, p_name
)))
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15
Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3
Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06
Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.65
Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.07
Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.73
Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68
Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38
Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.36
Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62
Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68
Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95
Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34
Manufacturer#3 almond antique misty red olive 1 4 4 6195.32
Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61
Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67
Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09
Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35
Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27
Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.62
Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69
Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35
Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08
Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18
Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66
PREHOOK: query: explain vectorization detail
select p_mfgr, p_name,
sub1.cd, sub1.s1
from (select p_mfgr, p_name,
count(p_size) over (partition by p_mfgr order by p_name) as cd,
p_retailprice,
round(sum(p_retailprice) over w1,2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name)
window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following)
) sub1
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name,
sub1.cd, sub1.s1
from (select p_mfgr, p_name,
count(p_size) over (partition by p_mfgr order by p_name) as cd,
p_retailprice,
round(sum(p_retailprice) over w1,2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name)
window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following)
) sub1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 2:string, 1:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 2:string
valueColumns: 5:int, 7:double
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_size (type: int), p_retailprice (type: double)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [1, 2, 5, 7]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
outputColumnNames: _col1, _col2, _col5, _col7
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col1: string, _col2: string, _col5: int, _col7: double
type: TABLE
Partition table definition
input alias: ptf_1
name: noop
order by: _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int, _col7: double
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int), _col7 (type: double)
Reducer 3
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: sum UNBOUNDED end frame is required for ROWS window type
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
outputColumnNames: _col1, _col2, _col5, _col7
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int, _col7: double
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col1 ASC NULLS LAST
partition by: _col2
raw input shape:
window functions:
window function definition
alias: count_window_0
arguments: _col5
name: count
window function: GenericUDAFCountEvaluator
window frame: RANGE PRECEDING(MAX)~CURRENT
window function definition
alias: sum_window_1
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: ROWS PRECEDING(2)~FOLLOWING(2)
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), round(sum_window_1, 2) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_name,
sub1.cd, sub1.s1
from (select p_mfgr, p_name,
count(p_size) over (partition by p_mfgr order by p_name) as cd,
p_retailprice,
round(sum(p_retailprice) over w1,2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name)
window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following)
) sub1
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_name,
sub1.cd, sub1.s1
from (select p_mfgr, p_name,
count(p_size) over (partition by p_mfgr order by p_name) as cd,
p_retailprice,
round(sum(p_retailprice) over w1,2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name)
window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following)
) sub1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 2 4100.06
Manufacturer#1 almond antique burnished rose metallic 2 5702.65
Manufacturer#1 almond antique chartreuse lavender yellow 3 7117.07
Manufacturer#1 almond antique salmon chartreuse burlywood 4 7576.58
Manufacturer#1 almond aquamarine burnished black steel 5 6403.43
Manufacturer#1 almond aquamarine pink moccasin thistle 6 4649.67
Manufacturer#2 almond antique violet chocolate turquoise 1 5523.36
Manufacturer#2 almond antique violet turquoise frosted 2 7222.02
Manufacturer#2 almond aquamarine midnight light salmon 3 8923.62
Manufacturer#2 almond aquamarine rose maroon antique 4 7232.94
Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5432.24
Manufacturer#3 almond antique chartreuse khaki white 1 4272.34
Manufacturer#3 almond antique forest lavender goldenrod 2 6195.32
Manufacturer#3 almond antique metallic orange dim 3 7532.61
Manufacturer#3 almond antique misty red olive 4 5860.93
Manufacturer#3 almond antique olive coral navajo 5 4670.66
Manufacturer#4 almond antique gainsboro frosted violet 1 4202.35
Manufacturer#4 almond antique violet mint lemon 2 6047.27
Manufacturer#4 almond aquamarine floral ivory bisque 3 7337.62
Manufacturer#4 almond aquamarine yellow dodger mint 4 5716.95
Manufacturer#4 almond azure aquamarine papaya violet 5 4341.53
Manufacturer#5 almond antique blue firebrick mint 1 5190.08
Manufacturer#5 almond antique medium spring khaki 2 6208.18
Manufacturer#5 almond antique sky peru orange 3 7672.66
Manufacturer#5 almond aquamarine dodger light gainsboro 4 5882.97
Manufacturer#5 almond azure blanched chiffon midnight 5 4271.31
PREHOOK: query: explain vectorization detail
select abc.p_mfgr, abc.p_name,
rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r,
dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr,
count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd,
abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1,
abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz
from noop(on part_orc
partition by p_mfgr
order by p_name
) abc join part_orc p1 on abc.p_partkey = p1.p_partkey
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select abc.p_mfgr, abc.p_name,
rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r,
dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr,
count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd,
abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1,
abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz
from noop(on part_orc
partition by p_mfgr
order by p_name
) abc join part_orc p1 on abc.p_partkey = p1.p_partkey
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 2:string, 1:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 2:string
valueColumns: 0:int, 5:int, 7:double
Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double)
Filter Operator
Filter Vectorization:
className: VectorFilterOperator
native: true
predicateExpression: SelectColumnIsNotNull(col 0:int)
predicate: p_partkey is not null (type: boolean)
Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: p_partkey (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: p_partkey (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkLongOperator
keyColumns: 0:int
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [0, 1, 2, 5, 7]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
outputColumnNames: _col0, _col1, _col2, _col5, _col7
Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col0: int, _col1: string, _col2: string, _col5: int, _col7: double
type: TABLE
Partition table definition
input alias: abc
name: noop
order by: _col1 ASC NULLS LAST
output shape: _col0: int, _col1: string, _col2: string, _col5: int, _col7: double
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: _col0 is not null (type: boolean)
Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double)
Reducer 3
Execution mode: llap
Reduce Operator Tree:
Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 p_partkey (type: int)
outputColumnNames: _col1, _col2, _col5, _col7
Statistics: Num rows: 27 Data size: 6237 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 27 Data size: 6237 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int), _col7 (type: double)
MergeJoin Vectorization:
enabled: false
enableConditionsNotMet: Vectorizing MergeJoin Supported IS false
Reducer 4
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder: az
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 4
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, bigint, bigint, double, bigint, bigint, double, bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
outputColumnNames: _col1, _col2, _col5, _col7
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 0, 2, 3]
Statistics: Num rows: 27 Data size: 6237 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int, _col7: double
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col1 ASC NULLS LAST
partition by: _col2
raw input shape:
window functions:
window function definition
alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: count_window_2
arguments: _col1
name: count
window function: GenericUDAFCountEvaluator
window frame: RANGE PRECEDING(MAX)~CURRENT
window function definition
alias: sum_window_3
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: ROWS PRECEDING(MAX)~CURRENT
window function definition
alias: lag_window_4
arguments: _col5, 1, _col5
name: lag
window function: GenericUDAFLagEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
PTF Vectorization:
allEvaluatorsAreStreaming: false
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorCount, VectorPTFEvaluatorStreamingDoubleSum, VectorPTFEvaluatorLag]
functionInputExpressions: [col 1:string, col 1:string, col 1:string, col 3:double, col 2:int]
functionNames: [rank, dense_rank, count, sum, lag]
keyInputColumns: [1, 0]
native: true
nonKeyInputColumns: [2, 3]
orderExpressions: [col 1:string]
outputColumns: [4, 5, 6, 7, 8, 1, 0, 2, 3]
outputTypes: [int, int, bigint, double, int, string, string, int, double]
partitionExpressions: [col 0:string]
streamingColumns: [4, 5, 7]
Statistics: Num rows: 27 Data size: 6237 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), round(sum_window_3, 2) (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 4, 5, 6, 3, 10, 2, 11]
selectExpressions: RoundWithNumDigitsDoubleToDouble(col 7, decimalPlaces 2) -> 10:double, LongColSubtractLongColumn(col 2:int, col 8:int) -> 11:int
Statistics: Num rows: 27 Data size: 6993 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 27 Data size: 6993 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select abc.p_mfgr, abc.p_name,
rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r,
dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr,
count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd,
abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1,
abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz
from noop(on part_orc
partition by p_mfgr
order by p_name
) abc join part_orc p1 on abc.p_partkey = p1.p_partkey
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select abc.p_mfgr, abc.p_name,
rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r,
dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr,
count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd,
abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1,
abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz
from noop(on part_orc
partition by p_mfgr
order by p_name
) abc join part_orc p1 on abc.p_partkey = p1.p_partkey
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 1173.15 2 0
Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 2346.3 2 0
Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 3519.45 2 0
Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 4692.6 2 0
Manufacturer#1 almond antique chartreuse lavender yellow 5 2 5 1753.76 6446.36 34 32
Manufacturer#1 almond antique salmon chartreuse burlywood 6 3 6 1602.59 8048.95 6 -28
Manufacturer#1 almond aquamarine burnished black steel 7 4 7 1414.42 9463.37 28 22
Manufacturer#1 almond aquamarine pink moccasin thistle 8 5 8 1632.66 11096.03 42 14
Manufacturer#2 almond antique violet chocolate turquoise 1 1 1 1690.68 1690.68 14 0
Manufacturer#2 almond antique violet turquoise frosted 2 2 2 1800.7 3491.38 40 26
Manufacturer#2 almond aquamarine midnight light salmon 3 3 3 2031.98 5523.36 2 -38
Manufacturer#2 almond aquamarine rose maroon antique 4 4 4 1698.66 7222.02 25 23
Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 5 1701.6 8923.62 18 -7
Manufacturer#3 almond antique chartreuse khaki white 1 1 1 1671.68 1671.68 17 0
Manufacturer#3 almond antique forest lavender goldenrod 2 2 2 1190.27 2861.95 14 -3
Manufacturer#3 almond antique metallic orange dim 3 3 3 1410.39 4272.34 19 5
Manufacturer#3 almond antique misty red olive 4 4 4 1922.98 6195.32 1 -18
Manufacturer#3 almond antique olive coral navajo 5 5 5 1337.29 7532.61 45 44
Manufacturer#4 almond antique gainsboro frosted violet 1 1 1 1620.67 1620.67 10 0
Manufacturer#4 almond antique violet mint lemon 2 2 2 1375.42 2996.09 39 29
Manufacturer#4 almond aquamarine floral ivory bisque 3 3 3 1206.26 4202.35 27 -12
Manufacturer#4 almond aquamarine yellow dodger mint 4 4 4 1844.92 6047.27 7 -20
Manufacturer#4 almond azure aquamarine papaya violet 5 5 5 1290.35 7337.62 12 5
Manufacturer#5 almond antique blue firebrick mint 1 1 1 1789.69 1789.69 31 0
Manufacturer#5 almond antique medium spring khaki 2 2 2 1611.66 3401.35 6 -25
Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4
Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44
Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23
PREHOOK: query: explain vectorization detail
select DISTINCT p_mfgr, p_name, p_size
from noop(on part_orc
partition by p_mfgr
order by p_name)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select DISTINCT p_mfgr, p_name, p_size
from noop(on part_orc
partition by p_mfgr
order by p_name)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 2:string, 1:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 2:string
valueColumns: 5:int
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_size (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [1, 2, 5]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col1: string, _col2: string, _col5: int
type: TABLE
Partition table definition
input alias: ptf_1
name: noop
order by: _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col2 (type: string), _col1 (type: string), _col5 (type: int)
minReductionHashAggr: 0.4
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 25 Data size: 5575 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
Statistics: Num rows: 25 Data size: 5575 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 3
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder: zzz
reduceColumnSortOrder: +++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:int
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
Group By Vectorization:
className: VectorGroupByOperator
groupByMode: MERGEPARTIAL
keyExpressions: col 0:string, col 1:string, col 2:int
native: false
vectorProcessingMode: MERGE_PARTIAL
projectedOutputColumnNums: []
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 25 Data size: 5575 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 25 Data size: 5575 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select DISTINCT p_mfgr, p_name, p_size
from noop(on part_orc
partition by p_mfgr
order by p_name)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select DISTINCT p_mfgr, p_name, p_size
from noop(on part_orc
partition by p_mfgr
order by p_name)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 2
Manufacturer#1 almond antique chartreuse lavender yellow 34
Manufacturer#1 almond antique salmon chartreuse burlywood 6
Manufacturer#1 almond aquamarine burnished black steel 28
Manufacturer#1 almond aquamarine pink moccasin thistle 42
Manufacturer#2 almond antique violet chocolate turquoise 14
Manufacturer#2 almond antique violet turquoise frosted 40
Manufacturer#2 almond aquamarine midnight light salmon 2
Manufacturer#2 almond aquamarine rose maroon antique 25
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18
Manufacturer#3 almond antique chartreuse khaki white 17
Manufacturer#3 almond antique forest lavender goldenrod 14
Manufacturer#3 almond antique metallic orange dim 19
Manufacturer#3 almond antique misty red olive 1
Manufacturer#3 almond antique olive coral navajo 45
Manufacturer#4 almond antique gainsboro frosted violet 10
Manufacturer#4 almond antique violet mint lemon 39
Manufacturer#4 almond aquamarine floral ivory bisque 27
Manufacturer#4 almond aquamarine yellow dodger mint 7
Manufacturer#4 almond azure aquamarine papaya violet 12
Manufacturer#5 almond antique blue firebrick mint 31
Manufacturer#5 almond antique medium spring khaki 6
Manufacturer#5 almond antique sky peru orange 2
Manufacturer#5 almond aquamarine dodger light gainsboro 46
Manufacturer#5 almond azure blanched chiffon midnight 23
PREHOOK: query: create view IF NOT EXISTS mfgr_price_view_n0 as
select p_mfgr, p_brand,
round(sum(p_retailprice),2) as s
from part_orc
group by p_mfgr, p_brand
PREHOOK: type: CREATEVIEW
PREHOOK: Input: default@part_orc
PREHOOK: Output: database:default
PREHOOK: Output: default@mfgr_price_view_n0
POSTHOOK: query: create view IF NOT EXISTS mfgr_price_view_n0 as
select p_mfgr, p_brand,
round(sum(p_retailprice),2) as s
from part_orc
group by p_mfgr, p_brand
POSTHOOK: type: CREATEVIEW
POSTHOOK: Input: default@part_orc
POSTHOOK: Output: database:default
POSTHOOK: Output: default@mfgr_price_view_n0
POSTHOOK: Lineage: mfgr_price_view_n0.p_brand SIMPLE [(part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), ]
POSTHOOK: Lineage: mfgr_price_view_n0.p_mfgr SIMPLE [(part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), ]
POSTHOOK: Lineage: mfgr_price_view_n0.s EXPRESSION [(part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), ]
PREHOOK: query: explain vectorization detail
select p_mfgr, p_brand, s,
round(sum(s) over w1,2) as s1
from noop(on mfgr_price_view_n0
partition by p_mfgr
order by p_mfgr)
window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row)
PREHOOK: type: QUERY
PREHOOK: Input: default@mfgr_price_view_n0
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_brand, s,
round(sum(s) over w1,2) as s1
from noop(on mfgr_price_view_n0
partition by p_mfgr
order by p_mfgr)
window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@mfgr_price_view_n0
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 5148 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Select Operator
expressions: p_mfgr (type: string), p_brand (type: string), p_retailprice (type: double)
outputColumnNames: p_mfgr, p_brand, p_retailprice
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [2, 3, 7]
Statistics: Num rows: 26 Data size: 5148 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(p_retailprice)
Group By Vectorization:
aggregators: VectorUDAFSumDouble(col 7:double) -> double
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 2:string, col 3:string
native: false
vectorProcessingMode: HASH
projectedOutputColumnNums: [0]
keys: p_mfgr (type: string), p_brand (type: string)
minReductionHashAggr: 0.4
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 16 Data size: 3168 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 0:string, 1:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 0:string
valueColumns: 2:double
Statistics: Num rows: 16 Data size: 3168 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: double)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [2, 3, 7]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: Only PTF directly under reduce-shuffle is supported
vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 16 Data size: 3168 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), round(_col2, 2) (type: double)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 16 Data size: 3168 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: mfgr_price_view_n0
output shape: _col0: string, _col1: string, _col2: double
type: TABLE
Partition table definition
input alias: ptf_1
name: noop
order by: _col0 ASC NULLS LAST
output shape: _col0: string, _col1: string, _col2: double
partition by: _col0
raw input shape:
Statistics: Num rows: 16 Data size: 3168 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 16 Data size: 3168 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: double)
Reducer 3
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder: az
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col0:double
partitionColumnCount: 0
scratchColumnTypeNames: [double, double]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double)
outputColumnNames: _col0, _col1, _col2
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2]
Statistics: Num rows: 16 Data size: 3168 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col0: string, _col1: string, _col2: double
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col1 ASC NULLS LAST
partition by: _col0
raw input shape:
window functions:
window function definition
alias: sum_window_0
arguments: _col2
name: sum
window function: GenericUDAFSumDouble
window frame: ROWS PRECEDING(2)~CURRENT
PTF Vectorization:
allEvaluatorsAreStreaming: false
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorDoubleSum]
functionInputExpressions: [col 2:double]
functionNames: [sum]
keyInputColumns: [0, 1]
native: true
nonKeyInputColumns: [2]
orderExpressions: [col 1:string]
outputColumns: [3, 0, 1, 2]
outputTypes: [double, string, string, double]
partitionExpressions: [col 0:string]
streamingColumns: []
Statistics: Num rows: 16 Data size: 3168 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), round(sum_window_0, 2) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 4]
selectExpressions: RoundWithNumDigitsDoubleToDouble(col 3, decimalPlaces 2) -> 4:double
Statistics: Num rows: 16 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 16 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_brand, s,
round(sum(s) over w1,2) as s1
from noop(on mfgr_price_view_n0
partition by p_mfgr
order by p_mfgr)
window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row)
PREHOOK: type: QUERY
PREHOOK: Input: default@mfgr_price_view_n0
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_brand, s,
round(sum(s) over w1,2) as s1
from noop(on mfgr_price_view_n0
partition by p_mfgr
order by p_mfgr)
window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@mfgr_price_view_n0
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
Manufacturer#1 Brand#12 4800.84 4800.84
Manufacturer#1 Brand#14 2346.3 7147.14
Manufacturer#1 Brand#15 1602.59 8749.73
Manufacturer#2 Brand#22 3491.38 3491.38
Manufacturer#2 Brand#23 2031.98 5523.36
Manufacturer#2 Brand#24 1698.66 7222.02
Manufacturer#2 Brand#25 1701.6 5432.24
Manufacturer#3 Brand#31 1671.68 1671.68
Manufacturer#3 Brand#32 3333.37 5005.05
Manufacturer#3 Brand#34 1337.29 6342.34
Manufacturer#3 Brand#35 1190.27 5860.93
Manufacturer#4 Brand#41 4755.94 4755.94
Manufacturer#4 Brand#42 2581.68 7337.62
Manufacturer#5 Brand#51 1611.66 1611.66
Manufacturer#5 Brand#52 3254.17 4865.83
Manufacturer#5 Brand#53 2806.83 7672.66
PREHOOK: query: CREATE TABLE part_4(
p_mfgr STRING,
p_name STRING,
p_size INT,
r INT,
dr INT,
s DOUBLE)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@part_4
POSTHOOK: query: CREATE TABLE part_4(
p_mfgr STRING,
p_name STRING,
p_size INT,
r INT,
dr INT,
s DOUBLE)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@part_4
PREHOOK: query: CREATE TABLE part_5(
p_mfgr STRING,
p_name STRING,
p_size INT,
s2 INT,
r INT,
dr INT,
cud DOUBLE,
fv1 INT)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@part_5
POSTHOOK: query: CREATE TABLE part_5(
p_mfgr STRING,
p_name STRING,
p_size INT,
s2 INT,
r INT,
dr INT,
cud DOUBLE,
fv1 INT)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@part_5
PREHOOK: query: explain vectorization detail
from noop(on part_orc
partition by p_mfgr
order by p_name)
INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size,
rank() over (distribute by p_mfgr sort by p_name) as r,
dense_rank() over (distribute by p_mfgr sort by p_name) as dr,
round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s
INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size,
round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2,
rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r,
dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr,
cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud,
first_value(p_size, true) over w1 as fv1
window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
PREHOOK: Output: default@part_4
PREHOOK: Output: default@part_5
POSTHOOK: query: explain vectorization detail
from noop(on part_orc
partition by p_mfgr
order by p_name)
INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size,
rank() over (distribute by p_mfgr sort by p_name) as r,
dense_rank() over (distribute by p_mfgr sort by p_name) as dr,
round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s
INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size,
round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2,
rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r,
dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr,
cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud,
first_value(p_size, true) over w1 as fv1
window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
POSTHOOK: Output: default@part_4
POSTHOOK: Output: default@part_5
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-3 depends on stages: Stage-2
Stage-0 depends on stages: Stage-3
Stage-4 depends on stages: Stage-0
Stage-1 depends on stages: Stage-3
Stage-5 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-2
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 2:string, 1:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 2:string
valueColumns: 5:int, 7:double
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_size (type: int), p_retailprice (type: double)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [1, 2, 5, 7]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
outputColumnNames: _col1, _col2, _col5, _col7
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col1: string, _col2: string, _col5: int, _col7: double
type: TABLE
Partition table definition
input alias: ptf_1
name: noop
order by: _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int, _col7: double
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int), _col7 (type: double)
Reduce Output Operator
key expressions: _col2 (type: string), _col5 (type: int)
null sort order: az
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Reducer 3
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder: az
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 4
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, bigint, double, double, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
outputColumnNames: _col1, _col2, _col5, _col7
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 0, 2, 3]
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int, _col7: double
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col1 ASC NULLS LAST
partition by: _col2
raw input shape:
window functions:
window function definition
alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: sum_window_2
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: ROWS PRECEDING(MAX)~CURRENT
PTF Vectorization:
allEvaluatorsAreStreaming: true
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum]
functionInputExpressions: [col 1:string, col 1:string, col 3:double]
functionNames: [rank, dense_rank, sum]
keyInputColumns: [1, 0]
native: true
nonKeyInputColumns: [2, 3]
orderExpressions: [col 1:string]
outputColumns: [4, 5, 6, 1, 0, 2, 3]
outputTypes: [int, int, double, string, string, int, double]
partitionExpressions: [col 0:string]
streamingColumns: [4, 5, 6]
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 4, 5, 7]
selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.part_4
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: double)
outputColumnNames: p_mfgr, p_name, p_size, r, dr, s
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 4, 5, 7]
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: max(length(p_mfgr)), avg(COALESCE(length(p_mfgr),0)), count(1), count(p_mfgr), compute_bit_vector_hll(p_mfgr), max(length(p_name)), avg(COALESCE(length(p_name),0)), count(p_name), compute_bit_vector_hll(p_name), min(p_size), max(p_size), count(p_size), compute_bit_vector_hll(p_size), min(r), max(r), count(r), compute_bit_vector_hll(r), min(dr), max(dr), count(dr), compute_bit_vector_hll(dr), min(s), max(s), count(s), compute_bit_vector_hll(s)
Group By Vectorization:
aggregators: VectorUDAFMaxLong(StringLength(col 0:string) -> 8:int) -> int, VectorUDAFAvgLong(VectorCoalesce(columns [9, 10])(children: StringLength(col 0:string) -> 9:int, ConstantVectorExpression(val 0) -> 10:int) -> 11:int) -> struct<count:bigint,sum:double,input:int>, VectorUDAFCount(ConstantVectorExpression(val 1) -> 12:int) -> bigint, VectorUDAFCount(col 0:string) -> bigint, VectorUDAFComputeBitVectorString(col 0:string) -> binary, VectorUDAFMaxLong(StringLength(col 1:string) -> 13:int) -> int, VectorUDAFAvgLong(VectorCoalesce(columns [14, 15])(children: StringLength(col 1:string) -> 14:int, ConstantVectorExpression(val 0) -> 15:int) -> 16:int) -> struct<count:bigint,sum:double,input:int>, VectorUDAFCount(col 1:string) -> bigint, VectorUDAFComputeBitVectorString(col 1:string) -> binary, VectorUDAFMinLong(col 2:int) -> int, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFComputeBitVectorLong(col 2:int) -> binary, VectorUDAFMinLong(col 4:int) -> int, VectorUDAFMaxLong(col 4:int) -> int, VectorUDAFCount(col 4:int) -> bigint, VectorUDAFComputeBitVectorLong(col 4:int) -> binary, VectorUDAFMinLong(col 5:int) -> int, VectorUDAFMaxLong(col 5:int) -> int, VectorUDAFCount(col 5:int) -> bigint, VectorUDAFComputeBitVectorLong(col 5:int) -> binary, VectorUDAFMinDouble(col 7:double) -> double, VectorUDAFMaxDouble(col 7:double) -> double, VectorUDAFCount(col 7:double) -> bigint, VectorUDAFComputeBitVectorDouble(col 7:double) -> binary
className: VectorGroupByOperator
groupByMode: HASH
native: false
vectorProcessingMode: HASH
projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
minReductionHashAggr: 0.96153843
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
null sort order:
sort order:
Reduce Sink Vectorization:
className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumns: 0:int, 1:struct<count:bigint,sum:double,input:int>, 2:bigint, 3:bigint, 4:binary, 5:int, 6:struct<count:bigint,sum:double,input:int>, 7:bigint, 8:binary, 9:int, 10:int, 11:bigint, 12:binary, 13:int, 14:int, 15:bigint, 16:binary, 17:int, 18:int, 19:bigint, 20:binary, 21:double, 22:double, 23:bigint, 24:binary
Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: int), _col1 (type: struct<count:bigint,sum:double,input:int>), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary), _col21 (type: double), _col22 (type: double), _col23 (type: bigint), _col24 (type: binary)
Reducer 4
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder:
reduceColumnSortOrder:
allNative: false
usesVectorUDFAdaptor: true
vectorized: true
rowBatchContext:
dataColumnCount: 25
dataColumns: VALUE._col0:int, VALUE._col1:struct<count:bigint,sum:double,input:int>, VALUE._col2:bigint, VALUE._col3:bigint, VALUE._col4:binary, VALUE._col5:int, VALUE._col6:struct<count:bigint,sum:double,input:int>, VALUE._col7:bigint, VALUE._col8:binary, VALUE._col9:int, VALUE._col10:int, VALUE._col11:bigint, VALUE._col12:binary, VALUE._col13:int, VALUE._col14:int, VALUE._col15:bigint, VALUE._col16:binary, VALUE._col17:int, VALUE._col18:int, VALUE._col19:bigint, VALUE._col20:binary, VALUE._col21:double, VALUE._col22:double, VALUE._col23:bigint, VALUE._col24:binary
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector_hll(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector_hll(VALUE._col20), min(VALUE._col21), max(VALUE._col22), count(VALUE._col23), compute_bit_vector_hll(VALUE._col24)
Group By Vectorization:
aggregators: VectorUDAFMaxLong(col 0:int) -> int, VectorUDAFAvgFinal(col 1:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFComputeBitVectorFinal(col 4:binary) -> binary, VectorUDAFMaxLong(col 5:int) -> int, VectorUDAFAvgFinal(col 6:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFComputeBitVectorFinal(col 8:binary) -> binary, VectorUDAFMinLong(col 9:int) -> int, VectorUDAFMaxLong(col 10:int) -> int, VectorUDAFCountMerge(col 11:bigint) -> bigint, VectorUDAFComputeBitVectorFinal(col 12:binary) -> binary, VectorUDAFMinLong(col 13:int) -> int, VectorUDAFMaxLong(col 14:int) -> int, VectorUDAFCountMerge(col 15:bigint) -> bigint, VectorUDAFComputeBitVectorFinal(col 16:binary) -> binary, VectorUDAFMinLong(col 17:int) -> int, VectorUDAFMaxLong(col 18:int) -> int, VectorUDAFCountMerge(col 19:bigint) -> bigint, VectorUDAFComputeBitVectorFinal(col 20:binary) -> binary, VectorUDAFMinDouble(col 21:double) -> double, VectorUDAFMaxDouble(col 22:double) -> double, VectorUDAFCountMerge(col 23:bigint) -> bigint, VectorUDAFComputeBitVectorFinal(col 24:binary) -> binary
className: VectorGroupByOperator
groupByMode: MERGEPARTIAL
native: false
vectorProcessingMode: GLOBAL
projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'DOUBLE' (type: string), _col21 (type: double), _col22 (type: double), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [25, 27, 29, 30, 33, 4, 34, 36, 38, 39, 42, 8, 43, 9, 10, 44, 47, 12, 48, 13, 14, 49, 52, 16, 53, 17, 18, 54, 57, 20, 58, 21, 22, 59, 62, 24]
selectExpressions: ConstantVectorExpression(val STRING) -> 25:string, VectorCoalesce(columns [0, 26])(children: col 0:int, ConstantVectorExpression(val 0) -> 26:int) -> 27:int, VectorCoalesce(columns [1, 28])(children: col 1:double, ConstantVectorExpression(val 0.0) -> 28:double) -> 29:double, LongColSubtractLongColumn(col 2:bigint, col 3:bigint) -> 30:bigint, VectorCoalesce(columns [31, 32])(children: VectorUDFAdaptor(ndv_compute_bit_vector(_col4)) -> 31:bigint, ConstantVectorExpression(val 0) -> 32:bigint) -> 33:bigint, ConstantVectorExpression(val STRING) -> 34:string, VectorCoalesce(columns [5, 35])(children: col 5:int, ConstantVectorExpression(val 0) -> 35:int) -> 36:int, VectorCoalesce(columns [6, 37])(children: col 6:double, ConstantVectorExpression(val 0.0) -> 37:double) -> 38:double, LongColSubtractLongColumn(col 2:bigint, col 7:bigint) -> 39:bigint, VectorCoalesce(columns [40, 41])(children: VectorUDFAdaptor(ndv_compute_bit_vector(_col8)) -> 40:bigint, ConstantVectorExpression(val 0) -> 41:bigint) -> 42:bigint, ConstantVectorExpression(val LONG) -> 43:string, LongColSubtractLongColumn(col 2:bigint, col 11:bigint) -> 44:bigint, VectorCoalesce(columns [45, 46])(children: VectorUDFAdaptor(ndv_compute_bit_vector(_col12)) -> 45:bigint, ConstantVectorExpression(val 0) -> 46:bigint) -> 47:bigint, ConstantVectorExpression(val LONG) -> 48:string, LongColSubtractLongColumn(col 2:bigint, col 15:bigint) -> 49:bigint, VectorCoalesce(columns [50, 51])(children: VectorUDFAdaptor(ndv_compute_bit_vector(_col16)) -> 50:bigint, ConstantVectorExpression(val 0) -> 51:bigint) -> 52:bigint, ConstantVectorExpression(val LONG) -> 53:string, LongColSubtractLongColumn(col 2:bigint, col 19:bigint) -> 54:bigint, VectorCoalesce(columns [55, 56])(children: VectorUDFAdaptor(ndv_compute_bit_vector(_col20)) -> 55:bigint, ConstantVectorExpression(val 0) -> 56:bigint) -> 57:bigint, ConstantVectorExpression(val DOUBLE) -> 58:string, LongColSubtractLongColumn(col 2:bigint, col 23:bigint) -> 59:bigint, VectorCoalesce(columns [60, 61])(children: VectorUDFAdaptor(ndv_compute_bit_vector(_col24)) -> 60:bigint, ConstantVectorExpression(val 0) -> 61:bigint) -> 62:bigint
Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 5
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder: az
reduceColumnSortOrder: ++
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int, VALUE._col1:string
partitionColumnCount: 0
scratchColumnTypeNames: [bigint]
Reduce Operator Tree:
Select Operator
expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int)
outputColumnNames: _col1, _col2, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [2, 0, 1]
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col5 ASC NULLS LAST
partition by: _col2
raw input shape:
window functions:
window function definition
alias: sum_window_0
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: RANGE PRECEDING(5)~CURRENT
PTF Vectorization:
allEvaluatorsAreStreaming: false
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorLongSum]
functionInputExpressions: [col 1:int]
functionNames: [sum]
keyInputColumns: [0, 1]
native: true
nonKeyInputColumns: [2]
orderExpressions: [col 1:int]
outputColumns: [3, 2, 0, 1]
outputTypes: [bigint, string, string, int]
partitionExpressions: [col 0:string]
streamingColumns: []
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: sum_window_0 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int)
outputColumnNames: sum_window_0, _col1, _col2, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [3, 2, 0, 1]
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 0:string, 2:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 0:string
valueColumns: 3:bigint, 1:int
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: sum_window_0 (type: bigint), _col5 (type: int)
Reducer 6
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: cume_dist not in supported functions [avg, count, dense_rank, first_value, lag, last_value, lead, max, min, rank, row_number, sum]
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int)
outputColumnNames: _col0, _col2, _col3, _col6
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col0: bigint, _col2: string, _col3: string, _col6: int
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col3 ASC NULLS LAST, _col2 ASC NULLS LAST
partition by: _col3
raw input shape:
window functions:
window function definition
alias: rank_window_1
arguments: _col3, _col2
name: rank
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: dense_rank_window_2
arguments: _col3, _col2
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: cume_dist_window_3
arguments: _col3, _col2
name: cume_dist
window function: GenericUDAFCumeDistEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: first_value_window_4
arguments: _col6, true
name: first_value
window function: GenericUDAFFirstValueEvaluator
window frame: ROWS PRECEDING(2)~FOLLOWING(2)
Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.part_5
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: double), _col7 (type: int)
outputColumnNames: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1
Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: max(length(p_mfgr)), avg(COALESCE(length(p_mfgr),0)), count(1), count(p_mfgr), compute_bit_vector_hll(p_mfgr), max(length(p_name)), avg(COALESCE(length(p_name),0)), count(p_name), compute_bit_vector_hll(p_name), min(p_size), max(p_size), count(p_size), compute_bit_vector_hll(p_size), min(s2), max(s2), count(s2), compute_bit_vector_hll(s2), min(r), max(r), count(r), compute_bit_vector_hll(r), min(dr), max(dr), count(dr), compute_bit_vector_hll(dr), min(cud), max(cud), count(cud), compute_bit_vector_hll(cud), min(fv1), max(fv1), count(fv1), compute_bit_vector_hll(fv1)
minReductionHashAggr: 0.96153843
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32
Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
null sort order:
sort order:
Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: int), _col1 (type: struct<count:bigint,sum:double,input:int>), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 (type: bigint), _col20 (type: binary), _col21 (type: int), _col22 (type: int), _col23 (type: bigint), _col24 (type: binary), _col25 (type: double), _col26 (type: double), _col27 (type: bigint), _col28 (type: binary), _col29 (type: int), _col30 (type: int), _col31 (type: bigint), _col32 (type: binary)
Reducer 7
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder:
reduceColumnSortOrder:
allNative: false
usesVectorUDFAdaptor: true
vectorized: true
rowBatchContext:
dataColumnCount: 33
dataColumns: VALUE._col0:int, VALUE._col1:struct<count:bigint,sum:double,input:int>, VALUE._col2:bigint, VALUE._col3:bigint, VALUE._col4:binary, VALUE._col5:int, VALUE._col6:struct<count:bigint,sum:double,input:int>, VALUE._col7:bigint, VALUE._col8:binary, VALUE._col9:int, VALUE._col10:int, VALUE._col11:bigint, VALUE._col12:binary, VALUE._col13:int, VALUE._col14:int, VALUE._col15:bigint, VALUE._col16:binary, VALUE._col17:int, VALUE._col18:int, VALUE._col19:bigint, VALUE._col20:binary, VALUE._col21:int, VALUE._col22:int, VALUE._col23:bigint, VALUE._col24:binary, VALUE._col25:double, VALUE._col26:double, VALUE._col27:bigint, VALUE._col28:binary, VALUE._col29:int, VALUE._col30:int, VALUE._col31:bigint, VALUE._col32:binary
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector_hll(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), compute_bit_vector_hll(VALUE._col20), min(VALUE._col21), max(VALUE._col22), count(VALUE._col23), compute_bit_vector_hll(VALUE._col24), min(VALUE._col25), max(VALUE._col26), count(VALUE._col27), compute_bit_vector_hll(VALUE._col28), min(VALUE._col29), max(VALUE._col30), count(VALUE._col31), compute_bit_vector_hll(VALUE._col32)
Group By Vectorization:
aggregators: VectorUDAFMaxLong(col 0:int) -> int, VectorUDAFAvgFinal(col 1:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFComputeBitVectorFinal(col 4:binary) -> binary, VectorUDAFMaxLong(col 5:int) -> int, VectorUDAFAvgFinal(col 6:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFComputeBitVectorFinal(col 8:binary) -> binary, VectorUDAFMinLong(col 9:int) -> int, VectorUDAFMaxLong(col 10:int) -> int, VectorUDAFCountMerge(col 11:bigint) -> bigint, VectorUDAFComputeBitVectorFinal(col 12:binary) -> binary, VectorUDAFMinLong(col 13:int) -> int, VectorUDAFMaxLong(col 14:int) -> int, VectorUDAFCountMerge(col 15:bigint) -> bigint, VectorUDAFComputeBitVectorFinal(col 16:binary) -> binary, VectorUDAFMinLong(col 17:int) -> int, VectorUDAFMaxLong(col 18:int) -> int, VectorUDAFCountMerge(col 19:bigint) -> bigint, VectorUDAFComputeBitVectorFinal(col 20:binary) -> binary, VectorUDAFMinLong(col 21:int) -> int, VectorUDAFMaxLong(col 22:int) -> int, VectorUDAFCountMerge(col 23:bigint) -> bigint, VectorUDAFComputeBitVectorFinal(col 24:binary) -> binary, VectorUDAFMinDouble(col 25:double) -> double, VectorUDAFMaxDouble(col 26:double) -> double, VectorUDAFCountMerge(col 27:bigint) -> bigint, VectorUDAFComputeBitVectorFinal(col 28:binary) -> binary, VectorUDAFMinLong(col 29:int) -> int, VectorUDAFMaxLong(col 30:int) -> int, VectorUDAFCountMerge(col 31:bigint) -> bigint, VectorUDAFComputeBitVectorFinal(col 32:binary) -> binary
className: VectorGroupByOperator
groupByMode: MERGEPARTIAL
native: false
vectorProcessingMode: GLOBAL
projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32
Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDFToLong(_col10) (type: bigint), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary), 'LONG' (type: string), UDFToLong(_col13) (type: bigint), UDFToLong(_col14) (type: bigint), (_col2 - _col15) (type: bigint), COALESCE(ndv_compute_bit_vector(_col16),0) (type: bigint), _col16 (type: binary), 'LONG' (type: string), UDFToLong(_col17) (type: bigint), UDFToLong(_col18) (type: bigint), (_col2 - _col19) (type: bigint), COALESCE(ndv_compute_bit_vector(_col20),0) (type: bigint), _col20 (type: binary), 'LONG' (type: string), UDFToLong(_col21) (type: bigint), UDFToLong(_col22) (type: bigint), (_col2 - _col23) (type: bigint), COALESCE(ndv_compute_bit_vector(_col24),0) (type: bigint), _col24 (type: binary), 'DOUBLE' (type: string), _col25 (type: double), _col26 (type: double), (_col2 - _col27) (type: bigint), COALESCE(ndv_compute_bit_vector(_col28),0) (type: bigint), _col28 (type: binary), 'LONG' (type: string), UDFToLong(_col29) (type: bigint), UDFToLong(_col30) (type: bigint), (_col2 - _col31) (type: bigint), COALESCE(ndv_compute_bit_vector(_col32),0) (type: bigint), _col32 (type: binary)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [33, 35, 37, 38, 41, 4, 42, 44, 46, 47, 50, 8, 51, 9, 10, 52, 55, 12, 56, 13, 14, 57, 60, 16, 61, 17, 18, 62, 65, 20, 66, 21, 22, 67, 70, 24, 71, 25, 26, 72, 75, 28, 76, 29, 30, 77, 80, 32]
selectExpressions: ConstantVectorExpression(val STRING) -> 33:string, VectorCoalesce(columns [0, 34])(children: col 0:int, ConstantVectorExpression(val 0) -> 34:int) -> 35:int, VectorCoalesce(columns [1, 36])(children: col 1:double, ConstantVectorExpression(val 0.0) -> 36:double) -> 37:double, LongColSubtractLongColumn(col 2:bigint, col 3:bigint) -> 38:bigint, VectorCoalesce(columns [39, 40])(children: VectorUDFAdaptor(ndv_compute_bit_vector(_col4)) -> 39:bigint, ConstantVectorExpression(val 0) -> 40:bigint) -> 41:bigint, ConstantVectorExpression(val STRING) -> 42:string, VectorCoalesce(columns [5, 43])(children: col 5:int, ConstantVectorExpression(val 0) -> 43:int) -> 44:int, VectorCoalesce(columns [6, 45])(children: col 6:double, ConstantVectorExpression(val 0.0) -> 45:double) -> 46:double, LongColSubtractLongColumn(col 2:bigint, col 7:bigint) -> 47:bigint, VectorCoalesce(columns [48, 49])(children: VectorUDFAdaptor(ndv_compute_bit_vector(_col8)) -> 48:bigint, ConstantVectorExpression(val 0) -> 49:bigint) -> 50:bigint, ConstantVectorExpression(val LONG) -> 51:string, LongColSubtractLongColumn(col 2:bigint, col 11:bigint) -> 52:bigint, VectorCoalesce(columns [53, 54])(children: VectorUDFAdaptor(ndv_compute_bit_vector(_col12)) -> 53:bigint, ConstantVectorExpression(val 0) -> 54:bigint) -> 55:bigint, ConstantVectorExpression(val LONG) -> 56:string, LongColSubtractLongColumn(col 2:bigint, col 15:bigint) -> 57:bigint, VectorCoalesce(columns [58, 59])(children: VectorUDFAdaptor(ndv_compute_bit_vector(_col16)) -> 58:bigint, ConstantVectorExpression(val 0) -> 59:bigint) -> 60:bigint, ConstantVectorExpression(val LONG) -> 61:string, LongColSubtractLongColumn(col 2:bigint, col 19:bigint) -> 62:bigint, VectorCoalesce(columns [63, 64])(children: VectorUDFAdaptor(ndv_compute_bit_vector(_col20)) -> 63:bigint, ConstantVectorExpression(val 0) -> 64:bigint) -> 65:bigint, ConstantVectorExpression(val LONG) -> 66:string, LongColSubtractLongColumn(col 2:bigint, col 23:bigint) -> 67:bigint, VectorCoalesce(columns [68, 69])(children: VectorUDFAdaptor(ndv_compute_bit_vector(_col24)) -> 68:bigint, ConstantVectorExpression(val 0) -> 69:bigint) -> 70:bigint, ConstantVectorExpression(val DOUBLE) -> 71:string, LongColSubtractLongColumn(col 2:bigint, col 27:bigint) -> 72:bigint, VectorCoalesce(columns [73, 74])(children: VectorUDFAdaptor(ndv_compute_bit_vector(_col28)) -> 73:bigint, ConstantVectorExpression(val 0) -> 74:bigint) -> 75:bigint, ConstantVectorExpression(val LONG) -> 76:string, LongColSubtractLongColumn(col 2:bigint, col 31:bigint) -> 77:bigint, VectorCoalesce(columns [78, 79])(children: VectorUDFAdaptor(ndv_compute_bit_vector(_col32)) -> 78:bigint, ConstantVectorExpression(val 0) -> 79:bigint) -> 80:bigint
Statistics: Num rows: 1 Data size: 2118 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 1 Data size: 2118 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-3
Dependency Collection
Stage: Stage-0
Move Operator
tables:
replace: true
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.part_4
Stage: Stage-4
Stats Work
Basic Stats Work:
Column Stats Desc:
Columns: p_mfgr, p_name, p_size, r, dr, s
Column Types: string, string, int, int, int, double
Table: default.part_4
Stage: Stage-1
Move Operator
tables:
replace: true
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.part_5
Stage: Stage-5
Stats Work
Basic Stats Work:
Column Stats Desc:
Columns: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1
Column Types: string, string, int, int, int, int, double, int
Table: default.part_5
PREHOOK: query: from noop(on part_orc
partition by p_mfgr
order by p_name)
INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size,
rank() over (distribute by p_mfgr sort by p_name) as r,
dense_rank() over (distribute by p_mfgr sort by p_name) as dr,
round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s
INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size,
round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2,
rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r,
dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr,
cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud,
first_value(p_size, true) over w1 as fv1
window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
PREHOOK: Output: default@part_4
PREHOOK: Output: default@part_5
POSTHOOK: query: from noop(on part_orc
partition by p_mfgr
order by p_name)
INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size,
rank() over (distribute by p_mfgr sort by p_name) as r,
dense_rank() over (distribute by p_mfgr sort by p_name) as dr,
round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s
INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size,
round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2,
rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r,
dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr,
cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud,
first_value(p_size, true) over w1 as fv1
window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
POSTHOOK: Output: default@part_4
POSTHOOK: Output: default@part_5
POSTHOOK: Lineage: part_4.dr SCRIPT [(part_orc)part_orc.FieldSchema(name:p_partkey, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_name, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_type, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_size, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_container, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), (part_orc)part_orc.FieldSchema(name:p_comment, type:string, comment:null), ]
POSTHOOK: Lineage: part_4.p_mfgr SCRIPT [(part_orc)part_orc.FieldSchema(name:p_partkey, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_name, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_type, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_size, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_container, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), (part_orc)part_orc.FieldSchema(name:p_comment, type:string, comment:null), ]
POSTHOOK: Lineage: part_4.p_name SCRIPT [(part_orc)part_orc.FieldSchema(name:p_partkey, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_name, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_type, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_size, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_container, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), (part_orc)part_orc.FieldSchema(name:p_comment, type:string, comment:null), ]
POSTHOOK: Lineage: part_4.p_size SCRIPT [(part_orc)part_orc.FieldSchema(name:p_partkey, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_name, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_type, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_size, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_container, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), (part_orc)part_orc.FieldSchema(name:p_comment, type:string, comment:null), ]
POSTHOOK: Lineage: part_4.r SCRIPT [(part_orc)part_orc.FieldSchema(name:p_partkey, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_name, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_type, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_size, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_container, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), (part_orc)part_orc.FieldSchema(name:p_comment, type:string, comment:null), ]
POSTHOOK: Lineage: part_4.s SCRIPT [(part_orc)part_orc.FieldSchema(name:p_partkey, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_name, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_type, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_size, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_container, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), (part_orc)part_orc.FieldSchema(name:p_comment, type:string, comment:null), ]
POSTHOOK: Lineage: part_5.cud SCRIPT [(part_orc)part_orc.FieldSchema(name:p_partkey, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_name, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_type, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_size, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_container, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), (part_orc)part_orc.FieldSchema(name:p_comment, type:string, comment:null), ]
POSTHOOK: Lineage: part_5.dr SCRIPT [(part_orc)part_orc.FieldSchema(name:p_partkey, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_name, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_type, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_size, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_container, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), (part_orc)part_orc.FieldSchema(name:p_comment, type:string, comment:null), ]
POSTHOOK: Lineage: part_5.fv1 SCRIPT [(part_orc)part_orc.FieldSchema(name:p_partkey, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_name, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_type, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_size, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_container, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), (part_orc)part_orc.FieldSchema(name:p_comment, type:string, comment:null), ]
POSTHOOK: Lineage: part_5.p_mfgr SCRIPT [(part_orc)part_orc.FieldSchema(name:p_partkey, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_name, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_type, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_size, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_container, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), (part_orc)part_orc.FieldSchema(name:p_comment, type:string, comment:null), ]
POSTHOOK: Lineage: part_5.p_name SCRIPT [(part_orc)part_orc.FieldSchema(name:p_partkey, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_name, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_type, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_size, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_container, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), (part_orc)part_orc.FieldSchema(name:p_comment, type:string, comment:null), ]
POSTHOOK: Lineage: part_5.p_size SCRIPT [(part_orc)part_orc.FieldSchema(name:p_partkey, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_name, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_type, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_size, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_container, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), (part_orc)part_orc.FieldSchema(name:p_comment, type:string, comment:null), ]
POSTHOOK: Lineage: part_5.r SCRIPT [(part_orc)part_orc.FieldSchema(name:p_partkey, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_name, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_type, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_size, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_container, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), (part_orc)part_orc.FieldSchema(name:p_comment, type:string, comment:null), ]
POSTHOOK: Lineage: part_5.s2 SCRIPT [(part_orc)part_orc.FieldSchema(name:p_partkey, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_name, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_type, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_size, type:int, comment:null), (part_orc)part_orc.FieldSchema(name:p_container, type:string, comment:null), (part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), (part_orc)part_orc.FieldSchema(name:p_comment, type:string, comment:null), ]
PREHOOK: query: select * from part_4
PREHOOK: type: QUERY
PREHOOK: Input: default@part_4
#### A masked pattern was here ####
POSTHOOK: query: select * from part_4
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_4
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15
Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3
Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06
Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.65
Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.07
Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.73
Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68
Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38
Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.36
Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62
Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68
Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95
Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34
Manufacturer#3 almond antique misty red olive 1 4 4 6195.32
Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61
Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67
Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09
Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35
Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27
Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.62
Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69
Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35
Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08
Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18
Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66
PREHOOK: query: select * from part_5
PREHOOK: type: QUERY
PREHOOK: Input: default@part_5
#### A masked pattern was here ####
POSTHOOK: query: select * from part_5
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_5
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 2 4 1 1 0.3333333333333333 2
Manufacturer#1 almond antique burnished rose metallic 2 4 1 1 0.3333333333333333 2
Manufacturer#1 almond antique chartreuse lavender yellow 34 34 3 2 0.5 2
Manufacturer#1 almond antique salmon chartreuse burlywood 6 10 4 3 0.6666666666666666 2
Manufacturer#1 almond aquamarine burnished black steel 28 28 5 4 0.8333333333333334 34
Manufacturer#1 almond aquamarine pink moccasin thistle 42 42 6 5 1.0 6
Manufacturer#2 almond antique violet chocolate turquoise 14 14 1 1 0.2 14
Manufacturer#2 almond antique violet turquoise frosted 40 40 2 2 0.4 14
Manufacturer#2 almond aquamarine midnight light salmon 2 2 3 3 0.6 14
Manufacturer#2 almond aquamarine rose maroon antique 25 25 4 4 0.8 40
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 32 5 5 1.0 2
Manufacturer#3 almond antique chartreuse khaki white 17 31 1 1 0.2 17
Manufacturer#3 almond antique forest lavender goldenrod 14 14 2 2 0.4 17
Manufacturer#3 almond antique metallic orange dim 19 50 3 3 0.6 17
Manufacturer#3 almond antique misty red olive 1 1 4 4 0.8 14
Manufacturer#3 almond antique olive coral navajo 45 45 5 5 1.0 19
Manufacturer#4 almond antique gainsboro frosted violet 10 17 1 1 0.2 10
Manufacturer#4 almond antique violet mint lemon 39 39 2 2 0.4 10
Manufacturer#4 almond aquamarine floral ivory bisque 27 27 3 3 0.6 10
Manufacturer#4 almond aquamarine yellow dodger mint 7 7 4 4 0.8 39
Manufacturer#4 almond azure aquamarine papaya violet 12 29 5 5 1.0 27
Manufacturer#5 almond antique blue firebrick mint 31 31 1 1 0.2 31
Manufacturer#5 almond antique medium spring khaki 6 8 2 2 0.4 31
Manufacturer#5 almond antique sky peru orange 2 2 3 3 0.6 31
Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 4 4 0.8 6
Manufacturer#5 almond azure blanched chiffon midnight 23 23 5 5 1.0 2
PREHOOK: query: explain vectorization detail
select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1
from noop(on
noopwithmap(on
noop(on
noop(on part_orc
partition by p_mfgr
order by p_mfgr)
)
partition by p_mfgr,p_name
order by p_mfgr,p_name)
partition by p_mfgr,p_name
order by p_mfgr,p_name)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1
from noop(on
noopwithmap(on
noop(on
noop(on part_orc
partition by p_mfgr
order by p_mfgr)
)
partition by p_mfgr,p_name
order by p_mfgr,p_name)
partition by p_mfgr,p_name
order by p_mfgr,p_name)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Reduce Output Operator
key expressions: p_mfgr (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: p_mfgr (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkStringOperator
keyColumns: 2:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumns: 1:string, 5:int
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_name (type: string), p_size (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [1, 2, 5]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col1: string, _col2: string, _col5: int
type: TABLE
Partition table definition
input alias: ptf_1
name: noop
order by: _col2 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2
raw input shape:
Partition table definition
input alias: ptf_2
name: noop
order by: _col2 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: PTFCOMPONENT
Partition table definition
input alias: ptf_1
name: noopwithmap
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2, _col1
raw input shape:
transforms raw input: true
Partition table definition
input alias: ptf_2
name: noop
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2, _col1
raw input shape:
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Map-side function: true
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col2 (type: string), _col1 (type: string)
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int)
Reducer 3
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: PTFCOMPONENT
Partition table definition
input alias: ptf_1
name: noopwithmap
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2, _col1
raw input shape:
transforms raw input: true
Partition table definition
input alias: ptf_2
name: noop
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2, _col1
raw input shape:
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: aa
sort order: ++
Map-reduce partition columns: _col2 (type: string), _col1 (type: string)
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int)
Reducer 4
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder: aa
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, bigint, bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 0, 2]
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST
partition by: _col2, _col1
raw input shape:
window functions:
window function definition
alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: ROWS PRECEDING(MAX)~CURRENT
PTF Vectorization:
allEvaluatorsAreStreaming: true
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingLongSum]
functionInputExpressions: [col 0:string, col 0:string, col 2:int]
functionNames: [rank, dense_rank, sum]
keyInputColumns: [1, 0]
native: true
nonKeyInputColumns: [2]
orderExpressions: [col 0:string, col 1:string]
outputColumns: [3, 4, 5, 1, 0, 2]
outputTypes: [int, int, bigint, string, string, int]
partitionExpressions: [col 0:string, col 1:string]
streamingColumns: [3, 4, 5]
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 3, 4, 2, 5]
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1
from noop(on
noopwithmap(on
noop(on
noop(on part_orc
partition by p_mfgr
order by p_mfgr)
)
partition by p_mfgr,p_name
order by p_mfgr,p_name)
partition by p_mfgr,p_name
order by p_mfgr,p_name)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1
from noop(on
noopwithmap(on
noop(on
noop(on part_orc
partition by p_mfgr
order by p_mfgr)
)
partition by p_mfgr,p_name
order by p_mfgr,p_name)
partition by p_mfgr,p_name
order by p_mfgr,p_name)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 1 1 2 2
Manufacturer#1 almond antique burnished rose metallic 1 1 2 4
Manufacturer#1 almond antique chartreuse lavender yellow 1 1 34 34
Manufacturer#1 almond antique salmon chartreuse burlywood 1 1 6 6
Manufacturer#1 almond aquamarine burnished black steel 1 1 28 28
Manufacturer#1 almond aquamarine pink moccasin thistle 1 1 42 42
Manufacturer#2 almond antique violet chocolate turquoise 1 1 14 14
Manufacturer#2 almond antique violet turquoise frosted 1 1 40 40
Manufacturer#2 almond aquamarine midnight light salmon 1 1 2 2
Manufacturer#2 almond aquamarine rose maroon antique 1 1 25 25
Manufacturer#2 almond aquamarine sandy cyan gainsboro 1 1 18 18
Manufacturer#3 almond antique chartreuse khaki white 1 1 17 17
Manufacturer#3 almond antique forest lavender goldenrod 1 1 14 14
Manufacturer#3 almond antique metallic orange dim 1 1 19 19
Manufacturer#3 almond antique misty red olive 1 1 1 1
Manufacturer#3 almond antique olive coral navajo 1 1 45 45
Manufacturer#4 almond antique gainsboro frosted violet 1 1 10 10
Manufacturer#4 almond antique violet mint lemon 1 1 39 39
Manufacturer#4 almond aquamarine floral ivory bisque 1 1 27 27
Manufacturer#4 almond aquamarine yellow dodger mint 1 1 7 7
Manufacturer#4 almond azure aquamarine papaya violet 1 1 12 12
Manufacturer#5 almond antique blue firebrick mint 1 1 31 31
Manufacturer#5 almond antique medium spring khaki 1 1 6 6
Manufacturer#5 almond antique sky peru orange 1 1 2 2
Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46
Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23
PREHOOK: query: explain vectorization detail
select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
from noop(on
noop(on
noop(on
noop(on part_orc
partition by p_mfgr
order by p_mfgr)
)
partition by p_mfgr,p_name
order by p_mfgr,p_name)
partition by p_mfgr
order by p_mfgr )
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
from noop(on
noop(on
noop(on
noop(on part_orc
partition by p_mfgr
order by p_mfgr)
)
partition by p_mfgr,p_name
order by p_mfgr,p_name)
partition by p_mfgr
order by p_mfgr )
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Reduce Output Operator
key expressions: p_mfgr (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: p_mfgr (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkStringOperator
keyColumns: 2:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumns: 1:string, 5:int
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_name (type: string), p_size (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [1, 2, 5]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col1: string, _col2: string, _col5: int
type: TABLE
Partition table definition
input alias: ptf_1
name: noop
order by: _col2 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2
raw input shape:
Partition table definition
input alias: ptf_2
name: noop
order by: _col2 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col2 (type: string), _col1 (type: string)
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int)
Reducer 3
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: PTFCOMPONENT
Partition table definition
input alias: ptf_1
name: noop
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2, _col1
raw input shape:
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string), _col5 (type: int)
Reducer 4
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: PTFCOMPONENT
Partition table definition
input alias: ptf_1
name: noop
order by: _col2 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int)
Reducer 5
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder: az
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, bigint, bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 0, 2]
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col1 ASC NULLS LAST
partition by: _col2
raw input shape:
window functions:
window function definition
alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: ROWS PRECEDING(MAX)~CURRENT
PTF Vectorization:
allEvaluatorsAreStreaming: true
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingLongSum]
functionInputExpressions: [col 1:string, col 1:string, col 2:int]
functionNames: [rank, dense_rank, sum]
keyInputColumns: [1, 0]
native: true
nonKeyInputColumns: [2]
orderExpressions: [col 1:string]
outputColumns: [3, 4, 5, 1, 0, 2]
outputTypes: [int, int, bigint, string, string, int]
partitionExpressions: [col 0:string]
streamingColumns: [3, 4, 5]
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 3, 4, 2, 5]
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
from noop(on
noop(on
noop(on
noop(on part_orc
partition by p_mfgr
order by p_mfgr)
)
partition by p_mfgr,p_name
order by p_mfgr,p_name)
partition by p_mfgr
order by p_mfgr )
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
from noop(on
noop(on
noop(on
noop(on part_orc
partition by p_mfgr
order by p_mfgr)
)
partition by p_mfgr,p_name
order by p_mfgr,p_name)
partition by p_mfgr
order by p_mfgr )
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 1 1 2 2
Manufacturer#1 almond antique burnished rose metallic 1 1 2 4
Manufacturer#1 almond antique chartreuse lavender yellow 3 2 34 38
Manufacturer#1 almond antique salmon chartreuse burlywood 4 3 6 44
Manufacturer#1 almond aquamarine burnished black steel 5 4 28 72
Manufacturer#1 almond aquamarine pink moccasin thistle 6 5 42 114
Manufacturer#2 almond antique violet chocolate turquoise 1 1 14 14
Manufacturer#2 almond antique violet turquoise frosted 2 2 40 54
Manufacturer#2 almond aquamarine midnight light salmon 3 3 2 56
Manufacturer#2 almond aquamarine rose maroon antique 4 4 25 81
Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 18 99
Manufacturer#3 almond antique chartreuse khaki white 1 1 17 17
Manufacturer#3 almond antique forest lavender goldenrod 2 2 14 31
Manufacturer#3 almond antique metallic orange dim 3 3 19 50
Manufacturer#3 almond antique misty red olive 4 4 1 51
Manufacturer#3 almond antique olive coral navajo 5 5 45 96
Manufacturer#4 almond antique gainsboro frosted violet 1 1 10 10
Manufacturer#4 almond antique violet mint lemon 2 2 39 49
Manufacturer#4 almond aquamarine floral ivory bisque 3 3 27 76
Manufacturer#4 almond aquamarine yellow dodger mint 4 4 7 83
Manufacturer#4 almond azure aquamarine papaya violet 5 5 12 95
Manufacturer#5 almond antique blue firebrick mint 1 1 31 31
Manufacturer#5 almond antique medium spring khaki 2 2 6 37
Manufacturer#5 almond antique sky peru orange 3 3 2 39
Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85
Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108
PREHOOK: query: explain vectorization detail
select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1
from noop(on
noop(on
noop(on
noop(on part_orc
partition by p_mfgr,p_name
order by p_mfgr,p_name)
)
partition by p_mfgr
order by p_mfgr))
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1
from noop(on
noop(on
noop(on
noop(on part_orc
partition by p_mfgr,p_name
order by p_mfgr,p_name)
)
partition by p_mfgr
order by p_mfgr))
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: p_mfgr (type: string), p_name (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkMultiKeyOperator
keyColumns: 2:string, 1:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumns: 5:int
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_size (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [1, 2, 5]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col1: string, _col2: string, _col5: int
type: TABLE
Partition table definition
input alias: ptf_1
name: noop
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2, _col1
raw input shape:
Partition table definition
input alias: ptf_2
name: noop
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2, _col1
raw input shape:
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string), _col5 (type: int)
Reducer 3
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: PTFCOMPONENT
Partition table definition
input alias: ptf_1
name: noop
order by: _col2 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2
raw input shape:
Partition table definition
input alias: ptf_2
name: noop
order by: _col2 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int)
Reducer 4
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder: az
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, bigint, bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 0, 2]
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col1 ASC NULLS LAST
partition by: _col2
raw input shape:
window functions:
window function definition
alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: RANGE PRECEDING(MAX)~CURRENT
PTF Vectorization:
allEvaluatorsAreStreaming: false
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorLongSum]
functionInputExpressions: [col 1:string, col 1:string, col 2:int]
functionNames: [rank, dense_rank, sum]
keyInputColumns: [1, 0]
native: true
nonKeyInputColumns: [2]
orderExpressions: [col 1:string]
outputColumns: [3, 4, 5, 1, 0, 2]
outputTypes: [int, int, bigint, string, string, int]
partitionExpressions: [col 0:string]
streamingColumns: [3, 4]
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 3, 4, 2, 5]
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1
from noop(on
noop(on
noop(on
noop(on part_orc
partition by p_mfgr,p_name
order by p_mfgr,p_name)
)
partition by p_mfgr
order by p_mfgr))
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1
from noop(on
noop(on
noop(on
noop(on part_orc
partition by p_mfgr,p_name
order by p_mfgr,p_name)
)
partition by p_mfgr
order by p_mfgr))
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 1 1 2 4
Manufacturer#1 almond antique burnished rose metallic 1 1 2 4
Manufacturer#1 almond antique chartreuse lavender yellow 3 2 34 38
Manufacturer#1 almond antique salmon chartreuse burlywood 4 3 6 44
Manufacturer#1 almond aquamarine burnished black steel 5 4 28 72
Manufacturer#1 almond aquamarine pink moccasin thistle 6 5 42 114
Manufacturer#2 almond antique violet chocolate turquoise 1 1 14 14
Manufacturer#2 almond antique violet turquoise frosted 2 2 40 54
Manufacturer#2 almond aquamarine midnight light salmon 3 3 2 56
Manufacturer#2 almond aquamarine rose maroon antique 4 4 25 81
Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 18 99
Manufacturer#3 almond antique chartreuse khaki white 1 1 17 17
Manufacturer#3 almond antique forest lavender goldenrod 2 2 14 31
Manufacturer#3 almond antique metallic orange dim 3 3 19 50
Manufacturer#3 almond antique misty red olive 4 4 1 51
Manufacturer#3 almond antique olive coral navajo 5 5 45 96
Manufacturer#4 almond antique gainsboro frosted violet 1 1 10 10
Manufacturer#4 almond antique violet mint lemon 2 2 39 49
Manufacturer#4 almond aquamarine floral ivory bisque 3 3 27 76
Manufacturer#4 almond aquamarine yellow dodger mint 4 4 7 83
Manufacturer#4 almond azure aquamarine papaya violet 5 5 12 95
Manufacturer#5 almond antique blue firebrick mint 1 1 31 31
Manufacturer#5 almond antique medium spring khaki 2 2 6 37
Manufacturer#5 almond antique sky peru orange 3 3 2 39
Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85
Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108
PREHOOK: query: explain vectorization detail
select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1
from noopwithmap(on
noop(on
noop(on
noop(on part_orc
partition by p_mfgr,p_name
order by p_mfgr,p_name)
)
partition by p_mfgr
order by p_mfgr)
partition by p_mfgr,p_name
order by p_mfgr,p_name)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1
from noopwithmap(on
noop(on
noop(on
noop(on part_orc
partition by p_mfgr,p_name
order by p_mfgr,p_name)
)
partition by p_mfgr
order by p_mfgr)
partition by p_mfgr,p_name
order by p_mfgr,p_name)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: p_mfgr (type: string), p_name (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkMultiKeyOperator
keyColumns: 2:string, 1:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumns: 5:int
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_size (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [1, 2, 5]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col1: string, _col2: string, _col5: int
type: TABLE
Partition table definition
input alias: ptf_1
name: noop
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2, _col1
raw input shape:
Partition table definition
input alias: ptf_2
name: noop
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2, _col1
raw input shape:
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string), _col5 (type: int)
Reducer 3
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: PTFCOMPONENT
Partition table definition
input alias: ptf_1
name: noop
order by: _col2 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: PTFCOMPONENT
Partition table definition
input alias: ptf_1
name: noopwithmap
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2, _col1
raw input shape:
transforms raw input: true
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Map-side function: true
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col2 (type: string), _col1 (type: string)
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int)
Reducer 4
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: PTFCOMPONENT
Partition table definition
input alias: ptf_1
name: noopwithmap
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2, _col1
raw input shape:
transforms raw input: true
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: aa
sort order: ++
Map-reduce partition columns: _col2 (type: string), _col1 (type: string)
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int)
Reducer 5
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder: aa
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, bigint, bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 0, 2]
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST
partition by: _col2, _col1
raw input shape:
window functions:
window function definition
alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: ROWS PRECEDING(MAX)~CURRENT
PTF Vectorization:
allEvaluatorsAreStreaming: true
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingLongSum]
functionInputExpressions: [col 0:string, col 0:string, col 2:int]
functionNames: [rank, dense_rank, sum]
keyInputColumns: [1, 0]
native: true
nonKeyInputColumns: [2]
orderExpressions: [col 0:string, col 1:string]
outputColumns: [3, 4, 5, 1, 0, 2]
outputTypes: [int, int, bigint, string, string, int]
partitionExpressions: [col 0:string, col 1:string]
streamingColumns: [3, 4, 5]
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 3, 4, 2, 5]
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1
from noopwithmap(on
noop(on
noop(on
noop(on part_orc
partition by p_mfgr,p_name
order by p_mfgr,p_name)
)
partition by p_mfgr
order by p_mfgr)
partition by p_mfgr,p_name
order by p_mfgr,p_name)
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1
from noopwithmap(on
noop(on
noop(on
noop(on part_orc
partition by p_mfgr,p_name
order by p_mfgr,p_name)
)
partition by p_mfgr
order by p_mfgr)
partition by p_mfgr,p_name
order by p_mfgr,p_name)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 1 1 2 2
Manufacturer#1 almond antique burnished rose metallic 1 1 2 4
Manufacturer#1 almond antique chartreuse lavender yellow 1 1 34 34
Manufacturer#1 almond antique salmon chartreuse burlywood 1 1 6 6
Manufacturer#1 almond aquamarine burnished black steel 1 1 28 28
Manufacturer#1 almond aquamarine pink moccasin thistle 1 1 42 42
Manufacturer#2 almond antique violet chocolate turquoise 1 1 14 14
Manufacturer#2 almond antique violet turquoise frosted 1 1 40 40
Manufacturer#2 almond aquamarine midnight light salmon 1 1 2 2
Manufacturer#2 almond aquamarine rose maroon antique 1 1 25 25
Manufacturer#2 almond aquamarine sandy cyan gainsboro 1 1 18 18
Manufacturer#3 almond antique chartreuse khaki white 1 1 17 17
Manufacturer#3 almond antique forest lavender goldenrod 1 1 14 14
Manufacturer#3 almond antique metallic orange dim 1 1 19 19
Manufacturer#3 almond antique misty red olive 1 1 1 1
Manufacturer#3 almond antique olive coral navajo 1 1 45 45
Manufacturer#4 almond antique gainsboro frosted violet 1 1 10 10
Manufacturer#4 almond antique violet mint lemon 1 1 39 39
Manufacturer#4 almond aquamarine floral ivory bisque 1 1 27 27
Manufacturer#4 almond aquamarine yellow dodger mint 1 1 7 7
Manufacturer#4 almond azure aquamarine papaya violet 1 1 12 12
Manufacturer#5 almond antique blue firebrick mint 1 1 31 31
Manufacturer#5 almond antique medium spring khaki 1 1 6 6
Manufacturer#5 almond antique sky peru orange 1 1 2 2
Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46
Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23
PREHOOK: query: explain vectorization detail
select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr,
p_size,
sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1,
sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2
from noop(on
noopwithmap(on
noop(on part_orc
partition by p_mfgr, p_name
order by p_mfgr, p_name)
partition by p_mfgr
order by p_mfgr
))
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr,
p_size,
sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1,
sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2
from noop(on
noopwithmap(on
noop(on part_orc
partition by p_mfgr, p_name
order by p_mfgr, p_name)
partition by p_mfgr
order by p_mfgr
))
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: p_mfgr (type: string), p_name (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkMultiKeyOperator
keyColumns: 2:string, 1:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumns: 5:int
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_size (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [1, 2, 5]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col1: string, _col2: string, _col5: int
type: TABLE
Partition table definition
input alias: ptf_1
name: noop
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2, _col1
raw input shape:
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: PTFCOMPONENT
Partition table definition
input alias: ptf_1
name: noopwithmap
order by: _col2 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2
raw input shape:
transforms raw input: true
Partition table definition
input alias: ptf_2
name: noop
order by: _col2 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Map-side function: true
Reduce Output Operator
key expressions: _col2 (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string), _col5 (type: int)
Reducer 3
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: PTFCOMPONENT
Partition table definition
input alias: ptf_1
name: noopwithmap
order by: _col2 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2
raw input shape:
transforms raw input: true
Partition table definition
input alias: ptf_2
name: noop
order by: _col2 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2
raw input shape:
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col2 (type: string), _col1 (type: string)
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int)
Reducer 4
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder: zz
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, bigint, bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 0, 2]
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
partition by: _col2, _col1
raw input shape:
window functions:
window function definition
alias: rank_window_0
arguments: _col2, _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: dense_rank_window_1
arguments: _col2, _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: ROWS PRECEDING(MAX)~CURRENT
PTF Vectorization:
allEvaluatorsAreStreaming: true
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingLongSum]
functionInputExpressions: [col 0:string, col 0:string, col 2:int]
functionNames: [rank, dense_rank, sum]
keyInputColumns: [1, 0]
native: true
nonKeyInputColumns: [2]
orderExpressions: [col 0:string, col 1:string]
outputColumns: [3, 4, 5, 1, 0, 2]
outputTypes: [int, int, bigint, string, string, int]
partitionExpressions: [col 0:string, col 1:string]
streamingColumns: [3, 4, 5]
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 3, 4, 2, 5, 5]
Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr,
p_size,
sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1,
sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2
from noop(on
noopwithmap(on
noop(on part_orc
partition by p_mfgr, p_name
order by p_mfgr, p_name)
partition by p_mfgr
order by p_mfgr
))
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr,
p_size,
sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1,
sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2
from noop(on
noopwithmap(on
noop(on part_orc
partition by p_mfgr, p_name
order by p_mfgr, p_name)
partition by p_mfgr
order by p_mfgr
))
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 2
Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 4
Manufacturer#1 almond antique chartreuse lavender yellow 1 1 34 34 34
Manufacturer#1 almond antique salmon chartreuse burlywood 1 1 6 6 6
Manufacturer#1 almond aquamarine burnished black steel 1 1 28 28 28
Manufacturer#1 almond aquamarine pink moccasin thistle 1 1 42 42 42
Manufacturer#2 almond antique violet chocolate turquoise 1 1 14 14 14
Manufacturer#2 almond antique violet turquoise frosted 1 1 40 40 40
Manufacturer#2 almond aquamarine midnight light salmon 1 1 2 2 2
Manufacturer#2 almond aquamarine rose maroon antique 1 1 25 25 25
Manufacturer#2 almond aquamarine sandy cyan gainsboro 1 1 18 18 18
Manufacturer#3 almond antique chartreuse khaki white 1 1 17 17 17
Manufacturer#3 almond antique forest lavender goldenrod 1 1 14 14 14
Manufacturer#3 almond antique metallic orange dim 1 1 19 19 19
Manufacturer#3 almond antique misty red olive 1 1 1 1 1
Manufacturer#3 almond antique olive coral navajo 1 1 45 45 45
Manufacturer#4 almond antique gainsboro frosted violet 1 1 10 10 10
Manufacturer#4 almond antique violet mint lemon 1 1 39 39 39
Manufacturer#4 almond aquamarine floral ivory bisque 1 1 27 27 27
Manufacturer#4 almond aquamarine yellow dodger mint 1 1 7 7 7
Manufacturer#4 almond azure aquamarine papaya violet 1 1 12 12 12
Manufacturer#5 almond antique blue firebrick mint 1 1 31 31 31
Manufacturer#5 almond antique medium spring khaki 1 1 6 6 6
Manufacturer#5 almond antique sky peru orange 1 1 2 2 2
Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 46
Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 23
PREHOOK: query: explain vectorization detail
select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size,
sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1,
sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2
from noopwithmap(on
noop(on
noop(on part_orc
partition by p_mfgr, p_name
order by p_mfgr, p_name)
))
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size,
sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1,
sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2
from noopwithmap(on
noop(on
noop(on part_orc
partition by p_mfgr, p_name
order by p_mfgr, p_name)
))
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 10:ROW__IS__DELETED:boolean]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: p_mfgr (type: string), p_name (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkMultiKeyOperator
keyColumns: 2:string, 1:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumns: 5:int
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_size (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [1, 2, 5]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: part_orc
output shape: _col1: string, _col2: string, _col5: int
type: TABLE
Partition table definition
input alias: ptf_1
name: noop
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2, _col1
raw input shape:
Partition table definition
input alias: ptf_2
name: noop
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2, _col1
raw input shape:
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: PTFCOMPONENT
Partition table definition
input alias: ptf_1
name: noopwithmap
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2, _col1
raw input shape:
transforms raw input: true
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Map-side function: true
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col2 (type: string), _col1 (type: string)
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int)
Reducer 3
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
notVectorizedReason: PTF operator: NOOP not supported
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: PTFCOMPONENT
Partition table definition
input alias: ptf_1
name: noopwithmap
order by: _col2 ASC NULLS LAST, _col1 ASC NULLS LAST
output shape: _col1: string, _col2: string, _col5: int
partition by: _col2, _col1
raw input shape:
transforms raw input: true
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
null sort order: az
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int)
Reducer 4
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder: az
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, bigint, bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 0, 2]
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: string, _col2: string, _col5: int
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col1 ASC NULLS LAST
partition by: _col2
raw input shape:
window functions:
window function definition
alias: rank_window_0
arguments: _col1
name: rank
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: dense_rank_window_1
arguments: _col1
name: dense_rank
window function: GenericUDAFDenseRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: sum_window_2
arguments: _col5
name: sum
window function: GenericUDAFSumLong
window frame: RANGE PRECEDING(MAX)~CURRENT
PTF Vectorization:
allEvaluatorsAreStreaming: false
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorLongSum]
functionInputExpressions: [col 1:string, col 1:string, col 2:int]
functionNames: [rank, dense_rank, sum]
keyInputColumns: [1, 0]
native: true
nonKeyInputColumns: [2]
orderExpressions: [col 1:string]
outputColumns: [3, 4, 5, 1, 0, 2]
outputTypes: [int, int, bigint, string, string, int]
partitionExpressions: [col 0:string]
streamingColumns: [3, 4]
Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 3, 4, 2, 5, 5]
Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size,
sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1,
sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2
from noopwithmap(on
noop(on
noop(on part_orc
partition by p_mfgr, p_name
order by p_mfgr, p_name)
))
PREHOOK: type: QUERY
PREHOOK: Input: default@part_orc
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size,
sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1,
sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2
from noopwithmap(on
noop(on
noop(on part_orc
partition by p_mfgr, p_name
order by p_mfgr, p_name)
))
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_orc
#### A masked pattern was here ####
Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 4
Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 4
Manufacturer#1 almond antique chartreuse lavender yellow 3 2 34 38 38
Manufacturer#1 almond antique salmon chartreuse burlywood 4 3 6 44 44
Manufacturer#1 almond aquamarine burnished black steel 5 4 28 72 72
Manufacturer#1 almond aquamarine pink moccasin thistle 6 5 42 114 114
Manufacturer#2 almond antique violet chocolate turquoise 1 1 14 14 14
Manufacturer#2 almond antique violet turquoise frosted 2 2 40 54 54
Manufacturer#2 almond aquamarine midnight light salmon 3 3 2 56 56
Manufacturer#2 almond aquamarine rose maroon antique 4 4 25 81 81
Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 18 99 99
Manufacturer#3 almond antique chartreuse khaki white 1 1 17 17 17
Manufacturer#3 almond antique forest lavender goldenrod 2 2 14 31 31
Manufacturer#3 almond antique metallic orange dim 3 3 19 50 50
Manufacturer#3 almond antique misty red olive 4 4 1 51 51
Manufacturer#3 almond antique olive coral navajo 5 5 45 96 96
Manufacturer#4 almond antique gainsboro frosted violet 1 1 10 10 10
Manufacturer#4 almond antique violet mint lemon 2 2 39 49 49
Manufacturer#4 almond aquamarine floral ivory bisque 3 3 27 76 76
Manufacturer#4 almond aquamarine yellow dodger mint 4 4 7 83 83
Manufacturer#4 almond azure aquamarine papaya violet 5 5 12 95 95
Manufacturer#5 almond antique blue firebrick mint 1 1 31 31 31
Manufacturer#5 almond antique medium spring khaki 2 2 6 37 37
Manufacturer#5 almond antique sky peru orange 3 3 2 39 39
Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 85
Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 108