blob: b690de6a7c29e7eed2d534f21bd330d8980ec95e [file] [log] [blame]
PREHOOK: query: drop table over10k_n3
PREHOOK: type: DROPTABLE
POSTHOOK: query: drop table over10k_n3
POSTHOOK: type: DROPTABLE
PREHOOK: query: create table over10k_n3(
t tinyint,
si smallint,
i int,
b bigint,
f float,
d double,
bo boolean,
s string,
ts timestamp,
`dec` decimal(4,2),
bin binary)
row format delimited
fields terminated by '|'
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@over10k_n3
POSTHOOK: query: create table over10k_n3(
t tinyint,
si smallint,
i int,
b bigint,
f float,
d double,
bo boolean,
s string,
ts timestamp,
`dec` decimal(4,2),
bin binary)
row format delimited
fields terminated by '|'
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@over10k_n3
PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k_n3
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@over10k_n3
POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k_n3
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@over10k_n3
PREHOOK: query: explain vectorization detail
select p_mfgr, p_retailprice, p_size,
round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2),
max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1
from part
window w1 as (distribute by p_mfgr sort by p_retailprice)
PREHOOK: type: QUERY
PREHOOK: Input: default@part
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_retailprice, p_size,
round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2),
max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1
from part
window w1 as (distribute by p_mfgr sort by p_retailprice)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part
#### A masked pattern was here ####
Explain
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part
Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_retailprice (type: double)
null sort order: az
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 2:string, 7:double
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 2:string
valueColumns: 5:int
Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_size (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [2, 5, 7]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
notVectorizedReason: PTF operator: lead and lag function not supported in argument expression of aggregation function sum
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey1 (type: double)
outputColumnNames: _col2, _col5, _col7
Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col2: string, _col5: int, _col7: double
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col7 ASC NULLS LAST
partition by: _col2
raw input shape:
window functions:
window function definition
alias: sum_window_0
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: RANGE PRECEDING(MAX)~CURRENT
window function definition
alias: sum_window_1
arguments: lag(...)
name: sum
window function: GenericUDAFSumDouble
window frame: RANGE PRECEDING(MAX)~CURRENT
window function definition
alias: last_value_window_2
arguments: _col7
name: last_value
window function: GenericUDAFLastValueEvaluator
window frame: RANGE PRECEDING(MAX)~CURRENT
window function definition
alias: max_window_3
arguments: _col7
name: max
window function: GenericUDAFMaxEvaluator
window frame: RANGE PRECEDING(MAX)~CURRENT
window function definition
alias: min_window_4
arguments: _col7
name: min
window function: GenericUDAFMinEvaluator
window frame: RANGE PRECEDING(MAX)~CURRENT
window function definition
alias: first_value_window_5
arguments: _col7
name: first_value
window function: GenericUDAFFirstValueEvaluator
window frame: RANGE PRECEDING(MAX)~CURRENT
Lead/Lag information: lag(...) (type: double)
Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col7 (type: double), _col5 (type: int), (round(sum_window_0, 2) = round((sum_window_1 + last_value_window_2), 2)) (type: boolean), ((max_window_3 - min_window_4) = (last_value_window_2 - first_value_window_5)) (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 26 Data size: 3068 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 26 Data size: 3068 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_retailprice, p_size,
round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2),
max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1
from part
window w1 as (distribute by p_mfgr sort by p_retailprice)
PREHOOK: type: QUERY
PREHOOK: Input: default@part
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_retailprice, p_size,
round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2),
max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1
from part
window w1 as (distribute by p_mfgr sort by p_retailprice)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part
#### A masked pattern was here ####
p_mfgr p_retailprice p_size _c3 _c4
Manufacturer#1 1173.15 2 true true
Manufacturer#1 1173.15 2 true true
Manufacturer#1 1414.42 28 true true
Manufacturer#1 1602.59 6 true true
Manufacturer#1 1632.66 42 true true
Manufacturer#1 1753.76 34 true true
Manufacturer#2 1690.68 14 true true
Manufacturer#2 1698.66 25 true true
Manufacturer#2 1701.6 18 true true
Manufacturer#2 1800.7 40 true true
Manufacturer#2 2031.98 2 true true
Manufacturer#3 1190.27 14 true true
Manufacturer#3 1337.29 45 true true
Manufacturer#3 1410.39 19 true true
Manufacturer#3 1671.68 17 true true
Manufacturer#3 1922.98 1 true true
Manufacturer#4 1206.26 27 true true
Manufacturer#4 1290.35 12 true true
Manufacturer#4 1375.42 39 true true
Manufacturer#4 1620.67 10 true true
Manufacturer#4 1844.92 7 true true
Manufacturer#5 1018.1 46 true true
Manufacturer#5 1464.48 23 true true
Manufacturer#5 1611.66 6 true true
Manufacturer#5 1788.73 2 true true
Manufacturer#5 1789.69 31 true true
PREHOOK: query: explain vectorization detail
select p_mfgr, p_retailprice, p_size,
rank() over (distribute by p_mfgr sort by p_retailprice) as r,
sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2,
sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1
from part
PREHOOK: type: QUERY
PREHOOK: Input: default@part
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_retailprice, p_size,
rank() over (distribute by p_mfgr sort by p_retailprice) as r,
sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2,
sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1
from part
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part
#### A masked pattern was here ####
Explain
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part
Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_retailprice (type: double)
null sort order: az
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 2:string, 7:double
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 2:string
valueColumns: 5:int
Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_size (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [2, 5, 7]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
reduceColumnNullOrder: az
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:double, VALUE._col4:int
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, double, double]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey1 (type: double)
outputColumnNames: _col2, _col5, _col7
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 2, 1]
Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col2: string, _col5: int, _col7: double
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col7 ASC NULLS LAST
partition by: _col2
raw input shape:
window functions:
window function definition
alias: rank_window_0
arguments: _col7
name: rank
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
window function definition
alias: sum_window_1
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: ROWS PRECEDING(MAX)~CURRENT
PTF Vectorization:
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorStreamingDoubleSum]
functionInputExpressions: [col 1:double, col 1:double]
functionNames: [rank, sum]
keyInputColumns: [0, 1]
native: true
nonKeyInputColumns: [2]
orderExpressions: [col 1:double]
outputColumns: [3, 4, 0, 2, 1]
outputTypes: [int, double, string, int, double]
partitionExpressions: [col 0:string]
streamingColumns: [3, 4]
Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col7 (type: double), _col5 (type: int), rank_window_0 (type: int), sum_window_1 (type: double), (sum_window_1 - 5.0D) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
selectExpressions: DoubleColSubtractDoubleScalar(col 4:double, val 5.0) -> 5:double
Statistics: Num rows: 26 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 26 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_retailprice, p_size,
rank() over (distribute by p_mfgr sort by p_retailprice) as r,
sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2,
sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1
from part
PREHOOK: type: QUERY
PREHOOK: Input: default@part
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_retailprice, p_size,
rank() over (distribute by p_mfgr sort by p_retailprice) as r,
sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2,
sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1
from part
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part
#### A masked pattern was here ####
p_mfgr p_retailprice p_size r s2 s1
Manufacturer#1 1173.15 2 1 1173.15 1168.15
Manufacturer#1 1173.15 2 1 2346.3 2341.3
Manufacturer#1 1414.42 28 3 3760.7200000000003 3755.7200000000003
Manufacturer#1 1602.59 6 4 5363.31 5358.31
Manufacturer#1 1632.66 42 5 6995.97 6990.97
Manufacturer#1 1753.76 34 6 8749.73 8744.73
Manufacturer#2 1690.68 14 1 1690.68 1685.68
Manufacturer#2 1698.66 25 2 3389.34 3384.34
Manufacturer#2 1701.6 18 3 5090.9400000000005 5085.9400000000005
Manufacturer#2 1800.7 40 4 6891.64 6886.64
Manufacturer#2 2031.98 2 5 8923.62 8918.62
Manufacturer#3 1190.27 14 1 1190.27 1185.27
Manufacturer#3 1337.29 45 2 2527.56 2522.56
Manufacturer#3 1410.39 19 3 3937.95 3932.95
Manufacturer#3 1671.68 17 4 5609.63 5604.63
Manufacturer#3 1922.98 1 5 7532.610000000001 7527.610000000001
Manufacturer#4 1206.26 27 1 1206.26 1201.26
Manufacturer#4 1290.35 12 2 2496.6099999999997 2491.6099999999997
Manufacturer#4 1375.42 39 3 3872.0299999999997 3867.0299999999997
Manufacturer#4 1620.67 10 4 5492.7 5487.7
Manufacturer#4 1844.92 7 5 7337.62 7332.62
Manufacturer#5 1018.1 46 1 1018.1 1013.1
Manufacturer#5 1464.48 23 2 2482.58 2477.58
Manufacturer#5 1611.66 6 3 4094.24 4089.24
Manufacturer#5 1788.73 2 4 5882.969999999999 5877.969999999999
Manufacturer#5 1789.69 31 5 7672.66 7667.66
PREHOOK: query: explain vectorization detail
select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k_n3 limit 100
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k_n3
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k_n3 limit 100
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k_n3
#### A masked pattern was here ####
Explain
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: over10k_n3
Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:t:tinyint, 1:si:smallint, 2:i:int, 3:b:bigint, 4:f:float, 5:d:double, 6:bo:boolean, 7:s:string, 8:ts:timestamp, 9:dec:decimal(4,2)/DECIMAL_64, 10:bin:binary, 11:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Reduce Output Operator
key expressions: t (type: tinyint), bo (type: boolean), s (type: string), si (type: smallint), f (type: float)
null sort order: azzza
sort order: ++++-
Map-reduce partition columns: t (type: tinyint)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 0:tinyint, 6:boolean, 7:string, 1:smallint, 4:float
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 0:tinyint
Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 11
includeColumns: [0, 1, 4, 6, 7]
dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
notVectorizedReason: PTF operator: lead not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum]
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey3 (type: smallint), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: string)
outputColumnNames: _col0, _col1, _col4, _col6, _col7
Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col0: tinyint, _col1: smallint, _col4: float, _col6: boolean, _col7: string
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col6 ASC NULLS LAST, _col7 ASC NULLS LAST, _col1 ASC NULLS LAST, _col4 DESC NULLS FIRST
partition by: _col0
raw input shape:
window functions:
window function definition
alias: lead_window_0
arguments: _col4, 3
name: lead
window function: GenericUDAFLeadEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 100
Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col7 (type: string), _col1 (type: smallint), _col4 (type: float), (UDFToFloat(_col1) - lead_window_0) (type: float)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: 100
Processor Tree:
ListSink
PREHOOK: query: select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k_n3 limit 100
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k_n3
#### A masked pattern was here ####
POSTHOOK: query: select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k_n3 limit 100
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k_n3
#### A masked pattern was here ####
s si f _c3
alice allen 400 76.31 337.23
alice davidson 384 71.97 357.79
alice king 455 2.48 395.93
alice king 458 62.77 384.16998
alice xylophone 485 26.21 464.05
bob falkner 260 59.07 242.4
bob ichabod 454 73.83 381.7
bob polk 264 20.95 257.17
bob underhill 454 17.6 424.94
bob underhill 465 72.3 453.17
bob van buren 433 6.83 398.4
calvin ichabod 431 29.06 334.22
david garcia 485 11.83 421.51
ethan steinbeck 298 34.6 288.14
fred ellison 376 96.78 330.76
holly steinbeck 384 63.49 293.7
holly underhill 318 9.86 269.91
irene ellison 458 45.24 365.29
irene underhill 307 90.3 244.19
jessica johnson 494 48.09 490.18
jessica king 459 92.71 452.2
jessica white 284 62.81 209.08
luke garcia 311 3.82 267.27
luke young 451 6.8 429.0
mike king 275 74.92 211.81
oscar garcia 362 43.73 340.66
priscilla laertes 316 22.0 296.06
priscilla quirinius 423 63.19 362.72
priscilla zipper 485 21.34 400.61
quinn ellison 266 19.94 209.95
quinn polk 507 60.28 447.66
sarah robinson 320 84.39 309.74
tom polk 346 56.05 320.33
ulysses ellison 381 59.34 358.66
ulysses quirinius 303 10.26 259.6
ulysses robinson 313 25.67 269.31
ulysses steinbeck 333 22.34 270.61
victor allen 337 43.4 311.5
victor hernandez 447 43.69 375.22
victor xylophone 438 62.39 424.33
wendy quirinius 279 25.5 250.25
wendy robinson 275 71.78 262.88
wendy xylophone 314 13.67 295.73
xavier garcia 493 28.75 474.56
zach thompson 386 12.12 377.63
zach young 286 18.27 263.65
alice falkner 280 18.44 227.7
bob ellison 339 8.37 300.95
bob johnson 374 22.35 326.49
calvin white 280 52.3 198.32
david carson 270 38.05 255.77
david falkner 469 47.51 388.35
david hernandez 408 81.68 339.27
ethan underhill 339 14.23 256.26
gabriella brown 498 80.65 413.25
holly nixon 505 68.73 440.71
holly polk 268 82.74 182.04001
holly thompson 387 84.75 298.22
irene young 458 64.29 401.8
jessica miller 299 85.96 243.41
katie ichabod 469 88.78 385.61
luke ichabod 289 56.2 286.74
luke king 337 55.59 274.88
mike allen 465 83.39 383.03
mike polk 500 2.26 427.74
mike white 454 62.12 430.78
mike xylophone 448 81.97 447.17
nick nixon 335 72.26 240.78
nick robinson 350 23.22 294.59
oscar davidson 432 0.83 420.93
oscar johnson 315 94.22 233.05
oscar johnson 469 55.41 468.44
oscar miller 324 11.07 265.19
rachel davidson 507 81.95 468.78
rachel thompson 344 0.56 246.12
sarah miller 386 58.81 304.36
sarah xylophone 275 38.22 177.48999
sarah zipper 376 97.88 294.61
tom hernandez 467 81.64 459.9
tom hernandez 477 97.51 415.19
tom steinbeck 414 81.39 361.87
ulysses carson 343 7.1 314.22
victor robinson 415 61.81 349.5
victor thompson 344 52.13 NULL
xavier ovid 280 28.78 NULL
yuri xylophone 430 65.5 NULL
alice underhill 389 26.68 368.06
alice underhill 446 6.49 444.21
bob ovid 331 67.12 236.43
bob van buren 406 20.94 383.32
david falkner 406 1.79 374.34
david miller 450 94.57 380.13
ethan allen 380 22.68 375.6
ethan king 395 31.66 361.51
ethan nixon 475 69.87 431.39
ethan polk 283 4.4 243.82
fred allen 331 33.49 281.68
fred king 511 43.61 457.22
fred polk 261 39.18 248.73
fred young 303 49.32 221.51001
PREHOOK: query: explain vectorization detail
select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k_n3 limit 100
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k_n3
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k_n3 limit 100
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k_n3
#### A masked pattern was here ####
Explain
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: over10k_n3
Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:t:tinyint, 1:si:smallint, 2:i:int, 3:b:bigint, 4:f:float, 5:d:double, 6:bo:boolean, 7:s:string, 8:ts:timestamp, 9:dec:decimal(4,2)/DECIMAL_64, 10:bin:binary, 11:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Reduce Output Operator
key expressions: si (type: smallint), i (type: int), s (type: string)
null sort order: azz
sort order: +++
Map-reduce partition columns: si (type: smallint)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 1:smallint, 2:int, 7:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 1:smallint
Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 11
includeColumns: [1, 2, 7]
dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
notVectorizedReason: PTF operator: lead not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum]
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string)
outputColumnNames: _col1, _col2, _col7
Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: smallint, _col2: int, _col7: string
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col2 ASC NULLS LAST, _col7 ASC NULLS LAST
partition by: _col1
raw input shape:
window functions:
window function definition
alias: lead_window_0
arguments: _col2, 3, 0
name: lead
window function: GenericUDAFLeadEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 100
Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col7 (type: string), _col2 (type: int), (_col2 - lead_window_0) (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: 100
Processor Tree:
ListSink
PREHOOK: query: select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k_n3 limit 100
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k_n3
#### A masked pattern was here ####
POSTHOOK: query: select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k_n3 limit 100
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k_n3
#### A masked pattern was here ####
s i _c2
wendy garcia 65540 -18
ethan thompson 65543 -20
zach nixon 65549 -31
alice robinson 65558 -28
wendy nixon 65563 -33
victor robinson 65580 -19
ethan falkner 65586 -18
victor davidson 65596 -17
xavier quirinius 65599 -14
fred quirinius 65604 -11
nick zipper 65613 -3
xavier van buren 65613 -7
victor johnson 65615 -12
alice ovid 65616 -24
xavier ovid 65620 -23
ulysses white 65627 -24
sarah white 65640 -13
calvin young 65643 -25
victor thompson 65651 -42
calvin johnson 65653 -53
irene polk 65668 -45
zach underhill 65693 -38
quinn hernandez 65706 -27
rachel ovid 65713 -24
gabriella falkner 65731 -7
zach white 65733 -8
fred hernandez 65737 -7
rachel ellison 65738 -6
oscar steinbeck 65741 -6
alice ellison 65744 -8
tom allen 65744 -19
quinn quirinius 65747 -31
victor hernandez 65752 -26
holly xylophone 65763 -26
david davidson 65778 65778
ulysses young 65778 65778
sarah brown 65789 65789
xavier brown 65541 -16
zach hernandez 65542 -18
katie ichabod 65547 -19
oscar young 65557 -15
holly white 65560 -14
priscilla laertes 65566 -9
ethan king 65572 -6
zach hernandez 65574 -10
oscar thompson 65575 -13
victor xylophone 65578 -16
gabriella ellison 65584 -26
nick quirinius 65588 -22
holly robinson 65594 -18
alice xylophone 65610 -16
yuri brown 65610 -21
sarah hernandez 65612 -26
katie garcia 65626 -28
jessica laertes 65631 -23
ethan underhill 65638 -17
irene young 65654 -37
priscilla thompson 65654 -40
luke quirinius 65655 -44
david brown 65691 -20
luke falkner 65694 -18
priscilla miller 65699 -20
rachel robinson 65711 -9
ethan polk 65712 -10
wendy brown 65719 -13
mike underhill 65720 -18
zach underhill 65722 -26
nick zipper 65732 -20
fred brown 65738 -18
ulysses young 65748 -23
nick davidson 65752 -19
fred zipper 65756 -15
yuri nixon 65771 -10
zach hernandez 65771 -19
zach zipper 65771 65771
alice underhill 65781 65781
oscar laertes 65790 65790
sarah zipper 65546 -19
bob falkner 65551 -17
luke ovid 65551 -17
katie allen 65565 -4
nick falkner 65568 -5
zach steinbeck 65568 -11
oscar van buren 65569 -13
gabriella young 65573 -11
jessica ichabod 65579 -24
david garcia 65582 -24
nick xylophone 65584 -27
calvin johnson 65603 -14
xavier zipper 65606 -50
alice nixon 65611 -58
jessica laertes 65617 -62
fred king 65656 -61
priscilla underhill 65669 -48
priscilla zipper 65679 -45
nick king 65717 -11
sarah polk 65717 -17
irene quirinius 65724 -28
tom laertes 65728 -25
yuri johnson 65734 -27
PREHOOK: query: explain vectorization detail
select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k_n3 limit 100
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k_n3
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k_n3 limit 100
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k_n3
#### A masked pattern was here ####
Explain
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: over10k_n3
Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:t:tinyint, 1:si:smallint, 2:i:int, 3:b:bigint, 4:f:float, 5:d:double, 6:bo:boolean, 7:s:string, 8:ts:timestamp, 9:dec:decimal(4,2)/DECIMAL_64, 10:bin:binary, 11:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Reduce Output Operator
key expressions: b (type: bigint), si (type: smallint), s (type: string), d (type: double)
null sort order: azzz
sort order: ++++
Map-reduce partition columns: b (type: bigint)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 3:bigint, 1:smallint, 7:string, 5:double
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 3:bigint
Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 11
includeColumns: [1, 3, 5, 7]
dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum]
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey3 (type: double), KEY.reducesinkkey2 (type: string)
outputColumnNames: _col1, _col3, _col5, _col7
Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: NONE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col1: smallint, _col3: bigint, _col5: double, _col7: string
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col1 ASC NULLS LAST, _col7 ASC NULLS LAST, _col5 ASC NULLS LAST
partition by: _col3
raw input shape:
window functions:
window function definition
alias: lag_window_0
arguments: _col5, 3
name: lag
window function: GenericUDAFLagEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 100
Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col7 (type: string), _col1 (type: smallint), _col5 (type: double), (UDFToDouble(_col1) - lag_window_0) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: 100
Processor Tree:
ListSink
PREHOOK: query: select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k_n3 limit 100
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k_n3
#### A masked pattern was here ####
POSTHOOK: query: select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k_n3 limit 100
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k_n3
#### A masked pattern was here ####
s si d _c3
jessica ellison 262 30.41 NULL
david young 266 45.12 NULL
jessica steinbeck 274 2.15 NULL
david zipper 275 43.45 244.59
zach nixon 283 15.95 237.88
holly allen 285 24.37 282.85
irene garcia 292 33.54 248.55
ulysses xylophone 292 44.66 276.05
irene van buren 309 35.81 284.63
sarah miller 312 6.65 278.46
victor garcia 312 39.14 267.34000000000003
ethan ichabod 319 29.4 283.19
wendy falkner 322 10.02 315.35
oscar miller 324 25.95 284.86
david ovid 332 28.34 302.6
alice zipper 333 3.38 322.98
yuri nixon 333 8.28 307.05
ulysses nixon 335 18.48 306.66
david ovid 336 9.36 332.62
calvin falkner 337 17.63 328.72
katie quirinius 349 11.3 330.52
quinn miller 351 22.46 341.64
victor xylophone 357 38.58 339.37
ethan garcia 368 9.2 356.7
nick steinbeck 395 37.54 372.54
ulysses ichabod 415 47.61 376.42
rachel thompson 416 37.99 406.8
calvin young 418 47.22 380.46
katie xylophone 425 32.59 377.39
nick quirinius 429 19.63 391.01
ethan ellison 453 47.92 405.78
irene nixon 454 48.03 421.40999999999997
bob steinbeck 462 47.04 442.37
luke robinson 462 47.48 414.08
gabriella steinbeck 467 9.35 418.97
tom hernandez 467 29.36 419.96
irene polk 485 14.26 437.52
mike xylophone 494 36.92 484.65
calvin allen 499 39.99 469.64
quinn steinbeck 503 16.62 488.74
calvin thompson 263 30.87 NULL
rachel quirinius 263 29.46 NULL
ulysses garcia 263 31.85 NULL
mike steinbeck 266 48.57 235.13
rachel young 275 14.75 245.54
tom king 278 31.11 246.15
oscar robinson 283 30.35 234.43
zach allen 284 1.88 269.25
bob king 308 27.61 276.89
ulysses allen 310 22.77 279.65
fred nixon 317 0.48 315.12
gabriella robinson 321 0.33 293.39
bob johnson 325 9.61 302.23
rachel davidson 335 2.34 334.52
fred brown 337 5.8 336.67
wendy ellison 350 20.25 340.39
zach falkner 391 13.67 388.66
katie xylophone 410 39.09 404.2
holly king 413 3.56 392.75
sarah van buren 417 7.81 403.33
calvin van buren 430 36.01 390.90999999999997
katie white 434 33.56 430.44
oscar quirinius 454 7.03 446.19
zach young 505 18.19 468.99
gabriella robinson 506 12.8 472.44
sarah xylophone 507 16.09 499.97
rachel thompson 267 46.87 NULL
gabriella van buren 271 41.04 NULL
mike steinbeck 284 11.44 NULL
ethan ovid 293 2.08 246.13
luke falkner 293 40.67 251.96
irene nixon 321 24.35 309.56
mike van buren 327 2.58 324.92
ulysses robinson 329 26.64 288.33
quinn laertes 332 10.71 307.65
tom polk 346 34.03 343.42
jessica johnson 352 45.71 325.36
xavier davidson 354 33.9 343.29
wendy nixon 364 29.42 329.97
jessica quirinius 375 47.33 329.29
xavier brown 376 26.17 342.1
gabriella davidson 383 18.87 353.58
jessica brown 388 34.09 340.67
gabriella garcia 391 32.44 364.83
ethan miller 396 49.07 377.13
bob garcia 416 7.82 381.90999999999997
priscilla hernandez 416 29.94 383.56
holly nixon 419 17.81 369.93
nick underhill 429 39.54 421.18
xavier falkner 434 0.88 404.06
luke robinson 461 44.02 443.19
bob underhill 465 22.58 425.46
ulysses king 483 37.98 482.12
jessica miller 486 26.14 441.98
bob ovid 493 9.7 470.42
alice falkner 500 37.85 462.02
quinn xylophone 267 49.8 NULL
gabriella thompson 268 17.15 NULL
calvin xylophone 275 49.32 NULL
gabriella zipper 279 30.41 229.2
PREHOOK: query: explain vectorization detail
select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k_n3 limit 100
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k_n3
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k_n3 limit 100
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k_n3
#### A masked pattern was here ####
Explain
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: over10k_n3
Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:t:tinyint, 1:si:smallint, 2:i:int, 3:b:bigint, 4:f:float, 5:d:double, 6:bo:boolean, 7:s:string, 8:ts:timestamp, 9:dec:decimal(4,2)/DECIMAL_64, 10:bin:binary, 11:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Reduce Output Operator
key expressions: f (type: float), b (type: bigint)
null sort order: az
sort order: ++
Map-reduce partition columns: f (type: float)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 4:float, 3:bigint
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 4:float
valueColumns: 7:string
Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
value expressions: s (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 11
includeColumns: [3, 4, 7]
dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum]
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey0 (type: float), VALUE._col5 (type: string)
outputColumnNames: _col3, _col4, _col7
Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col3: bigint, _col4: float, _col7: string
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col3 ASC NULLS LAST
partition by: _col4
raw input shape:
window functions:
window function definition
alias: lag_window_0
arguments: _col7, 3, 'fred'
name: lag
window function: GenericUDAFLagEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 100
Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col7 (type: string), lag_window_0 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: 100
Processor Tree:
ListSink
PREHOOK: query: select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k_n3 limit 100
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k_n3
#### A masked pattern was here ####
POSTHOOK: query: select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k_n3 limit 100
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k_n3
#### A masked pattern was here ####
s lag_window_0
yuri thompson fred
bob ichabod fred
luke king fred
luke steinbeck fred
fred zipper fred
quinn miller fred
calvin van buren fred
holly steinbeck fred
david davidson fred
calvin thompson fred
calvin quirinius fred
david ovid fred
holly thompson fred
nick zipper fred
victor steinbeck fred
victor robinson fred
zach ovid fred
ulysses zipper fred
luke falkner fred
irene thompson fred
yuri johnson fred
ulysses falkner fred
gabriella robinson fred
alice robinson fred
priscilla xylophone fred
david laertes fred
mike underhill fred
victor van buren fred
holly falkner fred
priscilla falkner fred
ethan ovid fred
luke zipper fred
mike steinbeck fred
calvin white fred
alice quirinius fred
irene miller fred
wendy polk fred
nick young fred
yuri davidson fred
ethan ellison fred
zach hernandez fred
wendy miller fred
katie underhill fred
irene zipper fred
holly allen fred
quinn brown fred
calvin ovid fred
zach robinson fred
nick miller fred
mike allen fred
yuri van buren fred
priscilla young fred
zach miller fred
victor xylophone fred
sarah falkner fred
rachel ichabod fred
alice robinson fred
calvin ovid fred
calvin ovid fred
luke laertes fred
david hernandez fred
alice ovid fred
luke quirinius fred
oscar white fred
zach falkner fred
rachel thompson fred
priscilla king fred
xavier polk fred
wendy ichabod fred
rachel ovid fred
wendy allen fred
luke brown fred
mike brown fred
oscar ichabod fred
xavier garcia fred
yuri brown fred
bob xylophone fred
luke davidson fred
ethan quirinius fred
zach davidson fred
irene miller fred
wendy king fred
bob zipper fred
sarah thompson fred
bob carson fred
bob laertes fred
xavier allen fred
sarah robinson fred
david king fred
oscar davidson fred
victor hernandez fred
wendy polk fred
david ellison fred
ulysses johnson fred
jessica ovid fred
bob king fred
ulysses garcia fred
irene falkner fred
holly robinson fred
yuri white fred
PREHOOK: query: explain vectorization detail
select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part
PREHOOK: type: QUERY
PREHOOK: Input: default@part
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part
#### A masked pattern was here ####
Explain
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part
Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_type (type: string)
null sort order: za
sort order: ++
Map-reduce partition columns: p_mfgr (type: string), p_type (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkMultiKeyOperator
keyColumns: 2:string, 4:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumns: 7:double
Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_retailprice (type: double)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [2, 4, 7]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
reduceColumnNullOrder: za
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col5:double
partitionColumnCount: 0
scratchColumnTypeNames: [double]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col5 (type: double)
outputColumnNames: _col2, _col4, _col7
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2]
Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col2: string, _col4: string, _col7: double
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col2 ASC NULLS LAST
partition by: _col2, _col4
raw input shape:
window functions:
window function definition
alias: avg_window_0
arguments: _col7
name: avg
window function: GenericUDAFAverageEvaluatorDouble
window frame: RANGE PRECEDING(MAX)~CURRENT
PTF Vectorization:
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorDoubleAvg]
functionInputExpressions: [col 2:double]
functionNames: [avg]
keyInputColumns: [0, 1]
native: true
nonKeyInputColumns: [2]
orderExpressions: [col 0:string]
outputColumns: [3, 0, 1, 2]
outputTypes: [double, string, string, double]
partitionExpressions: [col 0:string, col 1:string]
streamingColumns: []
Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), avg_window_0 (type: double)
outputColumnNames: _col0, _col1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 3]
Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part
PREHOOK: type: QUERY
PREHOOK: Input: default@part
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part
#### A masked pattern was here ####
p_mfgr avg_window_0
Manufacturer#1 1753.76
Manufacturer#1 1632.66
Manufacturer#1 1602.59
Manufacturer#1 1414.42
Manufacturer#2 1800.7
Manufacturer#2 1690.68
Manufacturer#2 2031.98
Manufacturer#2 1701.6
Manufacturer#3 1410.39
Manufacturer#3 1671.68
Manufacturer#3 1190.27
Manufacturer#3 1337.29
Manufacturer#4 1375.42
Manufacturer#5 1788.73
Manufacturer#1 1173.15
Manufacturer#1 1173.15
Manufacturer#2 1698.66
Manufacturer#3 1922.98
Manufacturer#4 1844.92
Manufacturer#4 1620.67
Manufacturer#4 1206.26
Manufacturer#4 1290.35
Manufacturer#5 1018.1
Manufacturer#5 1464.48
Manufacturer#5 1789.69
Manufacturer#5 1611.66
PREHOOK: query: explain vectorization detail
select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part
PREHOOK: type: QUERY
PREHOOK: Input: default@part
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part
#### A masked pattern was here ####
Explain
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part
Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_type (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 2:string, 4:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 2:string
valueColumns: 7:double
Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_retailprice (type: double)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [2, 4, 7]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
reduceColumnNullOrder: zz
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col5:double
partitionColumnCount: 0
scratchColumnTypeNames: [double]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col5 (type: double)
outputColumnNames: _col2, _col4, _col7
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2]
Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col2: string, _col4: string, _col7: double
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col4 ASC NULLS LAST, _col2 ASC NULLS LAST
partition by: _col2
raw input shape:
window functions:
window function definition
alias: avg_window_0
arguments: _col7
name: avg
window function: GenericUDAFAverageEvaluatorDouble
window frame: ROWS PRECEDING(MAX)~CURRENT
PTF Vectorization:
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleAvg]
functionInputExpressions: [col 2:double]
functionNames: [avg]
keyInputColumns: [0, 1]
native: true
nonKeyInputColumns: [2]
orderExpressions: [col 1:string, col 0:string]
outputColumns: [3, 0, 1, 2]
outputTypes: [double, string, string, double]
partitionExpressions: [col 0:string]
streamingColumns: [3]
Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), avg_window_0 (type: double)
outputColumnNames: _col0, _col1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 3]
Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part
PREHOOK: type: QUERY
PREHOOK: Input: default@part
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part
#### A masked pattern was here ####
p_mfgr avg_window_0
Manufacturer#1 1753.76
Manufacturer#1 1693.21
Manufacturer#1 1663.0033333333333
Manufacturer#1 1540.54
Manufacturer#1 1467.062
Manufacturer#1 1458.2883333333332
Manufacturer#2 1800.7
Manufacturer#2 1745.69
Manufacturer#2 1841.1200000000001
Manufacturer#2 1805.505
Manufacturer#2 1784.7240000000002
Manufacturer#3 1922.98
Manufacturer#3 1666.685
Manufacturer#3 1668.3500000000001
Manufacturer#3 1548.83
Manufacturer#3 1506.522
Manufacturer#4 1844.92
Manufacturer#4 1610.17
Manufacturer#4 1613.67
Manufacturer#4 1511.8175
Manufacturer#4 1467.5240000000001
Manufacturer#5 1018.1
Manufacturer#5 1241.29
Manufacturer#5 1424.0900000000001
Manufacturer#5 1515.25
Manufacturer#5 1534.532
PREHOOK: query: create table t1_n23 (a1 int, b1 string)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@t1_n23
POSTHOOK: query: create table t1_n23 (a1 int, b1 string)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@t1_n23
PREHOOK: query: create table t2_n15 (a1 int, b1 string)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@t2_n15
POSTHOOK: query: create table t2_n15 (a1 int, b1 string)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@t2_n15
PREHOOK: query: explain vectorization detail
from (select sum(i) over (partition by ts order by i), s from over10k_n3) tt insert overwrite table t1_n23 select * insert overwrite table t2_n15 select *
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k_n3
PREHOOK: Output: default@t1_n23
PREHOOK: Output: default@t2_n15
POSTHOOK: query: explain vectorization detail
from (select sum(i) over (partition by ts order by i), s from over10k_n3) tt insert overwrite table t1_n23 select * insert overwrite table t2_n15 select *
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k_n3
POSTHOOK: Output: default@t1_n23
POSTHOOK: Output: default@t2_n15
Explain
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-3 depends on stages: Stage-2
Stage-0 depends on stages: Stage-3
Stage-4 depends on stages: Stage-0
Stage-1 depends on stages: Stage-3
Stage-5 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-2
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: over10k_n3
Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:t:tinyint, 1:si:smallint, 2:i:int, 3:b:bigint, 4:f:float, 5:d:double, 6:bo:boolean, 7:s:string, 8:ts:timestamp, 9:dec:decimal(4,2)/DECIMAL_64, 10:bin:binary, 11:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Reduce Output Operator
key expressions: ts (type: timestamp), i (type: int)
null sort order: az
sort order: ++
Map-reduce partition columns: ts (type: timestamp)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 8:timestamp, 2:int
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 8:timestamp
valueColumns: 7:string
Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE
value expressions: s (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 11
includeColumns: [2, 7, 8]
dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
reduceColumnNullOrder: az
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:int, VALUE._col6:string
partitionColumnCount: 0
scratchColumnTypeNames: [bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: int), VALUE._col6 (type: string), KEY.reducesinkkey0 (type: timestamp)
outputColumnNames: _col2, _col7, _col8
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 2, 0]
Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col2: int, _col7: string, _col8: timestamp
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col2 ASC NULLS LAST
partition by: _col8
raw input shape:
window functions:
window function definition
alias: sum_window_0
arguments: _col2
name: sum
window function: GenericUDAFSumLong
window frame: RANGE PRECEDING(MAX)~CURRENT
PTF Vectorization:
className: VectorPTFOperator
evaluatorClasses: [VectorPTFEvaluatorLongSum]
functionInputExpressions: [col 1:int]
functionNames: [sum]
keyInputColumns: [1, 0]
native: true
nonKeyInputColumns: [2]
orderExpressions: [col 1:int]
outputColumns: [3, 1, 2, 0]
outputTypes: [bigint, int, string, timestamp]
partitionExpressions: [col 0:timestamp]
streamingColumns: []
Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: sum_window_0 (type: bigint), _col7 (type: string)
outputColumnNames: _col0, _col1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [3, 2]
Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
outputColumnNames: _col0, _col1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [3, 2]
Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.t1_n23
Select Operator
expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
outputColumnNames: _col0, _col1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [3, 2]
Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.t2_n15
Stage: Stage-3
Dependency Collection
Stage: Stage-0
Move Operator
tables:
replace: true
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.t1_n23
Stage: Stage-4
Stats Work
Basic Stats Work:
Stage: Stage-1
Move Operator
tables:
replace: true
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.t2_n15
Stage: Stage-5
Stats Work
Basic Stats Work:
PREHOOK: query: from (select sum(i) over (partition by ts order by i), s from over10k_n3) tt insert overwrite table t1_n23 select * insert overwrite table t2_n15 select *
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k_n3
PREHOOK: Output: default@t1_n23
PREHOOK: Output: default@t2_n15
POSTHOOK: query: from (select sum(i) over (partition by ts order by i), s from over10k_n3) tt insert overwrite table t1_n23 select * insert overwrite table t2_n15 select *
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k_n3
POSTHOOK: Output: default@t1_n23
POSTHOOK: Output: default@t2_n15
POSTHOOK: Lineage: t1_n23.a1 SCRIPT [(over10k_n3)over10k_n3.FieldSchema(name:t, type:tinyint, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:si, type:smallint, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:i, type:int, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:b, type:bigint, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:f, type:float, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:d, type:double, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:bo, type:boolean, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:s, type:string, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:ts, type:timestamp, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:dec, type:decimal(4,2), comment:null), (over10k_n3)over10k_n3.FieldSchema(name:bin, type:binary, comment:null), ]
POSTHOOK: Lineage: t1_n23.b1 SIMPLE [(over10k_n3)over10k_n3.FieldSchema(name:s, type:string, comment:null), ]
POSTHOOK: Lineage: t2_n15.a1 SCRIPT [(over10k_n3)over10k_n3.FieldSchema(name:t, type:tinyint, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:si, type:smallint, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:i, type:int, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:b, type:bigint, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:f, type:float, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:d, type:double, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:bo, type:boolean, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:s, type:string, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:ts, type:timestamp, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:dec, type:decimal(4,2), comment:null), (over10k_n3)over10k_n3.FieldSchema(name:bin, type:binary, comment:null), ]
POSTHOOK: Lineage: t2_n15.b1 SIMPLE [(over10k_n3)over10k_n3.FieldSchema(name:s, type:string, comment:null), ]
_col0 _col1
PREHOOK: query: select * from t1_n23 limit 3
PREHOOK: type: QUERY
PREHOOK: Input: default@t1_n23
#### A masked pattern was here ####
POSTHOOK: query: select * from t1_n23 limit 3
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1_n23
#### A masked pattern was here ####
t1_n23.a1 t1_n23.b1
65542 rachel thompson
131088 oscar brown
262258 wendy steinbeck
PREHOOK: query: select * from t2_n15 limit 3
PREHOOK: type: QUERY
PREHOOK: Input: default@t2_n15
#### A masked pattern was here ####
POSTHOOK: query: select * from t2_n15 limit 3
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t2_n15
#### A masked pattern was here ####
t2_n15.a1 t2_n15.b1
65542 rachel thompson
131088 oscar brown
262258 wendy steinbeck
PREHOOK: query: explain vectorization detail
select p_mfgr, p_retailprice, p_size,
round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2)
from part
window w1 as (distribute by p_mfgr sort by p_retailprice)
limit 11
PREHOOK: type: QUERY
PREHOOK: Input: default@part
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select p_mfgr, p_retailprice, p_size,
round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2)
from part
window w1 as (distribute by p_mfgr sort by p_retailprice)
limit 11
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part
#### A masked pattern was here ####
Explain
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: part
Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_retailprice (type: double)
null sort order: az
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: 2:string, 7:double
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 2:string
valueColumns: 5:int
Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: p_size (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 9
includeColumns: [2, 5, 7]
dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
notVectorizedReason: PTF operator: lead and lag function not supported in argument expression of aggregation function sum
vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey1 (type: double)
outputColumnNames: _col2, _col5, _col7
Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
input alias: ptf_0
output shape: _col2: string, _col5: int, _col7: double
type: WINDOWING
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col7 ASC NULLS LAST
partition by: _col2
raw input shape:
window functions:
window function definition
alias: sum_window_0
arguments: _col7
name: sum
window function: GenericUDAFSumDouble
window frame: RANGE PRECEDING(MAX)~CURRENT
window function definition
alias: sum_window_1
arguments: lag(...)
name: sum
window function: GenericUDAFSumDouble
window frame: RANGE PRECEDING(MAX)~CURRENT
window function definition
alias: last_value_window_2
arguments: _col7
name: last_value
window function: GenericUDAFLastValueEvaluator
window frame: RANGE PRECEDING(MAX)~CURRENT
Lead/Lag information: lag(...) (type: double)
Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 11
Statistics: Num rows: 11 Data size: 4158 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col7 (type: double), _col5 (type: int), ((round(sum_window_0, 2) + 50.0D) = round((sum_window_1 + last_value_window_2), 2)) (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 11 Data size: 1254 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 11 Data size: 1254 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: 11
Processor Tree:
ListSink
PREHOOK: query: select p_mfgr, p_retailprice, p_size,
round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2)
from part
window w1 as (distribute by p_mfgr sort by p_retailprice)
limit 11
PREHOOK: type: QUERY
PREHOOK: Input: default@part
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_retailprice, p_size,
round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2)
from part
window w1 as (distribute by p_mfgr sort by p_retailprice)
limit 11
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part
#### A masked pattern was here ####
p_mfgr p_retailprice p_size _c3
Manufacturer#1 1173.15 2 true
Manufacturer#1 1173.15 2 true
Manufacturer#1 1414.42 28 true
Manufacturer#1 1602.59 6 true
Manufacturer#1 1632.66 42 true
Manufacturer#1 1753.76 34 true
Manufacturer#2 1690.68 14 true
Manufacturer#2 1698.66 25 true
Manufacturer#2 1701.6 18 true
Manufacturer#2 1800.7 40 true
Manufacturer#2 2031.98 2 true
PREHOOK: query: select p_mfgr, p_retailprice, p_size,
round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2),
max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1
from part
window w1 as (distribute by p_mfgr sort by p_retailprice)
PREHOOK: type: QUERY
PREHOOK: Input: default@part
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_retailprice, p_size,
round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2),
max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1
from part
window w1 as (distribute by p_mfgr sort by p_retailprice)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part
#### A masked pattern was here ####
p_mfgr p_retailprice p_size _c3 _c4
Manufacturer#1 1173.15 2 true true
Manufacturer#1 1173.15 2 true true
Manufacturer#1 1414.42 28 true true
Manufacturer#1 1602.59 6 true true
Manufacturer#1 1632.66 42 true true
Manufacturer#1 1753.76 34 true true
Manufacturer#2 1690.68 14 true true
Manufacturer#2 1698.66 25 true true
Manufacturer#2 1701.6 18 true true
Manufacturer#2 1800.7 40 true true
Manufacturer#2 2031.98 2 true true
Manufacturer#3 1190.27 14 true true
Manufacturer#3 1337.29 45 true true
Manufacturer#3 1410.39 19 true true
Manufacturer#3 1671.68 17 true true
Manufacturer#3 1922.98 1 true true
Manufacturer#4 1206.26 27 true true
Manufacturer#4 1290.35 12 true true
Manufacturer#4 1375.42 39 true true
Manufacturer#4 1620.67 10 true true
Manufacturer#4 1844.92 7 true true
Manufacturer#5 1018.1 46 true true
Manufacturer#5 1464.48 23 true true
Manufacturer#5 1611.66 6 true true
Manufacturer#5 1788.73 2 true true
Manufacturer#5 1789.69 31 true true
PREHOOK: query: select p_mfgr, p_retailprice, p_size,
rank() over (distribute by p_mfgr sort by p_retailprice) as r,
sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2,
sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1
from part
PREHOOK: type: QUERY
PREHOOK: Input: default@part
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_retailprice, p_size,
rank() over (distribute by p_mfgr sort by p_retailprice) as r,
sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2,
sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1
from part
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part
#### A masked pattern was here ####
p_mfgr p_retailprice p_size r s2 s1
Manufacturer#1 1173.15 2 1 1173.15 1168.15
Manufacturer#1 1173.15 2 1 2346.3 2341.3
Manufacturer#1 1414.42 28 3 3760.7200000000003 3755.7200000000003
Manufacturer#1 1602.59 6 4 5363.31 5358.31
Manufacturer#1 1632.66 42 5 6995.97 6990.97
Manufacturer#1 1753.76 34 6 8749.73 8744.73
Manufacturer#2 1690.68 14 1 1690.68 1685.68
Manufacturer#2 1698.66 25 2 3389.34 3384.34
Manufacturer#2 1701.6 18 3 5090.9400000000005 5085.9400000000005
Manufacturer#2 1800.7 40 4 6891.64 6886.64
Manufacturer#2 2031.98 2 5 8923.62 8918.62
Manufacturer#3 1190.27 14 1 1190.27 1185.27
Manufacturer#3 1337.29 45 2 2527.56 2522.56
Manufacturer#3 1410.39 19 3 3937.95 3932.95
Manufacturer#3 1671.68 17 4 5609.63 5604.63
Manufacturer#3 1922.98 1 5 7532.610000000001 7527.610000000001
Manufacturer#4 1206.26 27 1 1206.26 1201.26
Manufacturer#4 1290.35 12 2 2496.6099999999997 2491.6099999999997
Manufacturer#4 1375.42 39 3 3872.0299999999997 3867.0299999999997
Manufacturer#4 1620.67 10 4 5492.7 5487.7
Manufacturer#4 1844.92 7 5 7337.62 7332.62
Manufacturer#5 1018.1 46 1 1018.1 1013.1
Manufacturer#5 1464.48 23 2 2482.58 2477.58
Manufacturer#5 1611.66 6 3 4094.24 4089.24
Manufacturer#5 1788.73 2 4 5882.969999999999 5877.969999999999
Manufacturer#5 1789.69 31 5 7672.66 7667.66
PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part
PREHOOK: type: QUERY
PREHOOK: Input: default@part
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part
#### A masked pattern was here ####
p_mfgr avg_window_0
Manufacturer#1 1753.76
Manufacturer#1 1632.66
Manufacturer#1 1602.59
Manufacturer#1 1414.42
Manufacturer#2 1800.7
Manufacturer#2 1690.68
Manufacturer#2 2031.98
Manufacturer#2 1701.6
Manufacturer#3 1410.39
Manufacturer#3 1671.68
Manufacturer#3 1190.27
Manufacturer#3 1337.29
Manufacturer#4 1375.42
Manufacturer#5 1788.73
Manufacturer#1 1173.15
Manufacturer#1 1173.15
Manufacturer#2 1698.66
Manufacturer#3 1922.98
Manufacturer#4 1844.92
Manufacturer#4 1620.67
Manufacturer#4 1206.26
Manufacturer#4 1290.35
Manufacturer#5 1018.1
Manufacturer#5 1464.48
Manufacturer#5 1789.69
Manufacturer#5 1611.66
PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part
PREHOOK: type: QUERY
PREHOOK: Input: default@part
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part
#### A masked pattern was here ####
p_mfgr avg_window_0
Manufacturer#1 1753.76
Manufacturer#1 1693.21
Manufacturer#1 1663.0033333333333
Manufacturer#1 1540.54
Manufacturer#1 1467.062
Manufacturer#1 1458.2883333333332
Manufacturer#2 1800.7
Manufacturer#2 1745.69
Manufacturer#2 1841.1200000000001
Manufacturer#2 1805.505
Manufacturer#2 1784.7240000000002
Manufacturer#3 1922.98
Manufacturer#3 1666.685
Manufacturer#3 1668.3500000000001
Manufacturer#3 1548.83
Manufacturer#3 1506.522
Manufacturer#4 1844.92
Manufacturer#4 1610.17
Manufacturer#4 1613.67
Manufacturer#4 1511.8175
Manufacturer#4 1467.5240000000001
Manufacturer#5 1018.1
Manufacturer#5 1241.29
Manufacturer#5 1424.0900000000001
Manufacturer#5 1515.25
Manufacturer#5 1534.532
PREHOOK: query: from (select sum(i) over (partition by ts order by i), s from over10k_n3) tt insert overwrite table t1_n23 select * insert overwrite table t2_n15 select *
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k_n3
PREHOOK: Output: default@t1_n23
PREHOOK: Output: default@t2_n15
POSTHOOK: query: from (select sum(i) over (partition by ts order by i), s from over10k_n3) tt insert overwrite table t1_n23 select * insert overwrite table t2_n15 select *
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k_n3
POSTHOOK: Output: default@t1_n23
POSTHOOK: Output: default@t2_n15
POSTHOOK: Lineage: t1_n23.a1 SCRIPT [(over10k_n3)over10k_n3.FieldSchema(name:t, type:tinyint, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:si, type:smallint, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:i, type:int, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:b, type:bigint, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:f, type:float, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:d, type:double, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:bo, type:boolean, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:s, type:string, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:ts, type:timestamp, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:dec, type:decimal(4,2), comment:null), (over10k_n3)over10k_n3.FieldSchema(name:bin, type:binary, comment:null), ]
POSTHOOK: Lineage: t1_n23.b1 SIMPLE [(over10k_n3)over10k_n3.FieldSchema(name:s, type:string, comment:null), ]
POSTHOOK: Lineage: t2_n15.a1 SCRIPT [(over10k_n3)over10k_n3.FieldSchema(name:t, type:tinyint, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:si, type:smallint, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:i, type:int, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:b, type:bigint, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:f, type:float, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:d, type:double, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:bo, type:boolean, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:s, type:string, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:ts, type:timestamp, comment:null), (over10k_n3)over10k_n3.FieldSchema(name:dec, type:decimal(4,2), comment:null), (over10k_n3)over10k_n3.FieldSchema(name:bin, type:binary, comment:null), ]
POSTHOOK: Lineage: t2_n15.b1 SIMPLE [(over10k_n3)over10k_n3.FieldSchema(name:s, type:string, comment:null), ]
_col0 _col1
PREHOOK: query: select * from t1_n23 limit 3
PREHOOK: type: QUERY
PREHOOK: Input: default@t1_n23
#### A masked pattern was here ####
POSTHOOK: query: select * from t1_n23 limit 3
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1_n23
#### A masked pattern was here ####
t1_n23.a1 t1_n23.b1
65542 rachel thompson
131088 oscar brown
262258 wendy steinbeck
PREHOOK: query: select * from t2_n15 limit 3
PREHOOK: type: QUERY
PREHOOK: Input: default@t2_n15
#### A masked pattern was here ####
POSTHOOK: query: select * from t2_n15 limit 3
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t2_n15
#### A masked pattern was here ####
t2_n15.a1 t2_n15.b1
65542 rachel thompson
131088 oscar brown
262258 wendy steinbeck
PREHOOK: query: select p_mfgr, p_retailprice, p_size,
round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2)
from part
window w1 as (distribute by p_mfgr sort by p_retailprice)
limit 11
PREHOOK: type: QUERY
PREHOOK: Input: default@part
#### A masked pattern was here ####
POSTHOOK: query: select p_mfgr, p_retailprice, p_size,
round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2)
from part
window w1 as (distribute by p_mfgr sort by p_retailprice)
limit 11
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part
#### A masked pattern was here ####
p_mfgr p_retailprice p_size _c3
Manufacturer#1 1173.15 2 true
Manufacturer#1 1173.15 2 true
Manufacturer#1 1414.42 28 true
Manufacturer#1 1602.59 6 true
Manufacturer#1 1632.66 42 true
Manufacturer#1 1753.76 34 true
Manufacturer#2 1690.68 14 true
Manufacturer#2 1698.66 25 true
Manufacturer#2 1701.6 18 true
Manufacturer#2 1800.7 40 true
Manufacturer#2 2031.98 2 true