blob: 25448dda679d560e996fc4a2ae42acec77a2b8d3 [file] [log] [blame]
PREHOOK: query: drop table ss_n0
PREHOOK: type: DROPTABLE
POSTHOOK: query: drop table ss_n0
POSTHOOK: type: DROPTABLE
PREHOOK: query: drop table ss_orc
PREHOOK: type: DROPTABLE
POSTHOOK: query: drop table ss_orc
POSTHOOK: type: DROPTABLE
PREHOOK: query: drop table ss_part
PREHOOK: type: DROPTABLE
POSTHOOK: query: drop table ss_part
POSTHOOK: type: DROPTABLE
PREHOOK: query: drop table ss_part_orc
PREHOOK: type: DROPTABLE
POSTHOOK: query: drop table ss_part_orc
POSTHOOK: type: DROPTABLE
PREHOOK: query: create table ss_n0 (
ss_sold_date_sk int,
ss_net_paid_inc_tax float,
ss_net_profit float)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@ss_n0
POSTHOOK: query: create table ss_n0 (
ss_sold_date_sk int,
ss_net_paid_inc_tax float,
ss_net_profit float)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@ss_n0
PREHOOK: query: create table ss_part (
ss_net_paid_inc_tax float,
ss_net_profit float)
partitioned by (ss_sold_date_sk int)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@ss_part
POSTHOOK: query: create table ss_part (
ss_net_paid_inc_tax float,
ss_net_profit float)
partitioned by (ss_sold_date_sk int)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@ss_part
PREHOOK: query: load data local inpath '../../data/files/dynpart_test.txt' overwrite into table ss_n0
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@ss_n0
POSTHOOK: query: load data local inpath '../../data/files/dynpart_test.txt' overwrite into table ss_n0
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@ss_n0
PREHOOK: query: explain insert overwrite table ss_part partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_n0
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
group by ss_sold_date_sk,
ss_net_paid_inc_tax,
ss_net_profit
distribute by ss_sold_date_sk
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_n0
PREHOOK: Output: default@ss_part
POSTHOOK: query: explain insert overwrite table ss_part partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_n0
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
group by ss_sold_date_sk,
ss_net_paid_inc_tax,
ss_net_profit
distribute by ss_sold_date_sk
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_n0
POSTHOOK: Output: default@ss_part
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: ss_n0
filterExpr: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean)
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean)
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float)
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: int), KEY._col1 (type: float), KEY._col2 (type: float)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: float), _col2 (type: float), _col0 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int)
outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: min(ss_net_paid_inc_tax), max(ss_net_paid_inc_tax), count(1), count(ss_net_paid_inc_tax), compute_bit_vector(ss_net_paid_inc_tax, 'hll'), min(ss_net_profit), max(ss_net_profit), count(ss_net_profit), compute_bit_vector(ss_net_profit, 'hll')
keys: ss_sold_date_sk (type: int)
mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: 'DOUBLE' (type: string), UDFToDouble(_col1) (type: double), UDFToDouble(_col2) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col6) (type: double), UDFToDouble(_col7) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Select Operator
expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int)
outputColumnNames: _col0, _col1, _col2
File Output Operator
compressed: false
Dp Sort State: PARTITION_SORTED
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.ss_part
Stage: Stage-2
Dependency Collection
Stage: Stage-0
Move Operator
tables:
partition:
ss_sold_date_sk
replace: true
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.ss_part
Stage: Stage-3
Stats Work
Basic Stats Work:
Column Stats Desc:
Columns: ss_net_paid_inc_tax, ss_net_profit
Column Types: float, float
Table: default.ss_part
PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_n0
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
group by ss_sold_date_sk,
ss_net_paid_inc_tax,
ss_net_profit
distribute by ss_sold_date_sk
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_n0
PREHOOK: Output: default@ss_part
POSTHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_n0
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
group by ss_sold_date_sk,
ss_net_paid_inc_tax,
ss_net_profit
distribute by ss_sold_date_sk
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_n0
POSTHOOK: Output: default@ss_part
POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452617
POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452638
POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_paid_inc_tax SIMPLE [(ss_n0)ss_n0.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ]
POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_profit SIMPLE [(ss_n0)ss_n0.FieldSchema(name:ss_net_profit, type:float, comment:null), ]
POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_paid_inc_tax SIMPLE [(ss_n0)ss_n0.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ]
POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_profit SIMPLE [(ss_n0)ss_n0.FieldSchema(name:ss_net_profit, type:float, comment:null), ]
PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617)
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@ss_part
POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617)
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@ss_part
# col_name data_type comment
ss_net_paid_inc_tax float
ss_net_profit float
# Partition Information
# col_name data_type comment
ss_sold_date_sk int
# Detailed Partition Information
Partition Value: [2452617]
Database: default
Table: ss_part
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}}
numFiles 1
numRows 11
rawDataSize 151
totalSize 162
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1
PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452617
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_part
PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452617
#### A masked pattern was here ####
POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452617
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_part
POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452617
#### A masked pattern was here ####
10022.63 3952.8 2452617
1765.07 -4648.8 2452617
2.1 -2026.3 2452617
2.99 -11.32 2452617
3423.95 -3164.07 2452617
5362.01 -600.28 2452617
552.96 -1363.84 2452617
565.92 196.48 2452617
7412.83 2071.68 2452617
85.8 25.61 2452617
879.07 -2185.76 2452617
PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638)
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@ss_part
POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638)
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@ss_part
# col_name data_type comment
ss_net_paid_inc_tax float
ss_net_profit float
# Partition Information
# col_name data_type comment
ss_sold_date_sk int
# Detailed Partition Information
Partition Value: [2452638]
Database: default
Table: ss_part
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}}
numFiles 1
numRows 13
rawDataSize 186
totalSize 199
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1
PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452638
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_part
PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452638
#### A masked pattern was here ####
POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452638
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_part
POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452638
#### A masked pattern was here ####
0.15 -241.22 2452638
10171.1 660.48 2452638
1327.08 57.97 2452638
1413.19 178.08 2452638
150.39 -162.12 2452638
1524.33 494.37 2452638
156.67 -4626.56 2452638
181.03 -207.24 2452638
1971.35 -488.25 2452638
267.01 -3266.36 2452638
317.87 -3775.38 2452638
4133.98 -775.72 2452638
4329.49 -4000.51 2452638
PREHOOK: query: explain insert overwrite table ss_part partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_n0
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
distribute by ss_sold_date_sk
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_n0
PREHOOK: Output: default@ss_part
POSTHOOK: query: explain insert overwrite table ss_part partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_n0
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
distribute by ss_sold_date_sk
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_n0
POSTHOOK: Output: default@ss_part
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: ss_n0
filterExpr: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean)
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean)
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ss_net_paid_inc_tax (type: float), ss_net_profit (type: float), ss_sold_date_sk (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col2 (type: int)
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int)
outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: min(ss_net_paid_inc_tax), max(ss_net_paid_inc_tax), count(1), count(ss_net_paid_inc_tax), compute_bit_vector(ss_net_paid_inc_tax, 'hll'), min(ss_net_profit), max(ss_net_profit), count(ss_net_profit), compute_bit_vector(ss_net_profit, 'hll')
keys: ss_sold_date_sk (type: int)
mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: 'DOUBLE' (type: string), UDFToDouble(_col1) (type: double), UDFToDouble(_col2) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col6) (type: double), UDFToDouble(_col7) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Select Operator
expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int)
outputColumnNames: _col0, _col1, _col2
File Output Operator
compressed: false
Dp Sort State: PARTITION_SORTED
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.ss_part
Stage: Stage-2
Dependency Collection
Stage: Stage-0
Move Operator
tables:
partition:
ss_sold_date_sk
replace: true
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.ss_part
Stage: Stage-3
Stats Work
Basic Stats Work:
Column Stats Desc:
Columns: ss_net_paid_inc_tax, ss_net_profit
Column Types: float, float
Table: default.ss_part
PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_n0
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
distribute by ss_sold_date_sk
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_n0
PREHOOK: Output: default@ss_part
POSTHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_n0
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
distribute by ss_sold_date_sk
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_n0
POSTHOOK: Output: default@ss_part
POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452617
POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452638
POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_paid_inc_tax SIMPLE [(ss_n0)ss_n0.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ]
POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_profit SIMPLE [(ss_n0)ss_n0.FieldSchema(name:ss_net_profit, type:float, comment:null), ]
POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_paid_inc_tax SIMPLE [(ss_n0)ss_n0.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ]
POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_profit SIMPLE [(ss_n0)ss_n0.FieldSchema(name:ss_net_profit, type:float, comment:null), ]
PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617)
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@ss_part
POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617)
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@ss_part
# col_name data_type comment
ss_net_paid_inc_tax float
ss_net_profit float
# Partition Information
# col_name data_type comment
ss_sold_date_sk int
# Detailed Partition Information
Partition Value: [2452617]
Database: default
Table: ss_part
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}}
numFiles 1
numRows 11
rawDataSize 151
totalSize 162
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1
PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452617
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_part
PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452617
#### A masked pattern was here ####
POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452617
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_part
POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452617
#### A masked pattern was here ####
10022.63 3952.8 2452617
1765.07 -4648.8 2452617
2.1 -2026.3 2452617
2.99 -11.32 2452617
3423.95 -3164.07 2452617
5362.01 -600.28 2452617
552.96 -1363.84 2452617
565.92 196.48 2452617
7412.83 2071.68 2452617
85.8 25.61 2452617
879.07 -2185.76 2452617
PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638)
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@ss_part
POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638)
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@ss_part
# col_name data_type comment
ss_net_paid_inc_tax float
ss_net_profit float
# Partition Information
# col_name data_type comment
ss_sold_date_sk int
# Detailed Partition Information
Partition Value: [2452638]
Database: default
Table: ss_part
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}}
numFiles 1
numRows 13
rawDataSize 186
totalSize 199
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1
PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452638
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_part
PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452638
#### A masked pattern was here ####
POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452638
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_part
POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452638
#### A masked pattern was here ####
0.15 -241.22 2452638
10171.1 660.48 2452638
1327.08 57.97 2452638
1413.19 178.08 2452638
150.39 -162.12 2452638
1524.33 494.37 2452638
156.67 -4626.56 2452638
181.03 -207.24 2452638
1971.35 -488.25 2452638
267.01 -3266.36 2452638
317.87 -3775.38 2452638
4133.98 -775.72 2452638
4329.49 -4000.51 2452638
PREHOOK: query: explain insert overwrite table ss_part partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_n0
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
group by ss_sold_date_sk,
ss_net_paid_inc_tax,
ss_net_profit
distribute by ss_sold_date_sk
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_n0
PREHOOK: Output: default@ss_part
POSTHOOK: query: explain insert overwrite table ss_part partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_n0
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
group by ss_sold_date_sk,
ss_net_paid_inc_tax,
ss_net_profit
distribute by ss_sold_date_sk
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_n0
POSTHOOK: Output: default@ss_part
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: ss_n0
filterExpr: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean)
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean)
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float)
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: int), KEY._col1 (type: float), KEY._col2 (type: float)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: float), _col2 (type: float), _col0 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.ss_part
Select Operator
expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int)
outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: min(ss_net_paid_inc_tax), max(ss_net_paid_inc_tax), count(1), count(ss_net_paid_inc_tax), compute_bit_vector(ss_net_paid_inc_tax, 'hll'), min(ss_net_profit), max(ss_net_profit), count(ss_net_profit), compute_bit_vector(ss_net_profit, 'hll')
keys: ss_sold_date_sk (type: int)
mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: 'DOUBLE' (type: string), UDFToDouble(_col1) (type: double), UDFToDouble(_col2) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col6) (type: double), UDFToDouble(_col7) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-2
Dependency Collection
Stage: Stage-0
Move Operator
tables:
partition:
ss_sold_date_sk
replace: true
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.ss_part
Stage: Stage-3
Stats Work
Basic Stats Work:
Column Stats Desc:
Columns: ss_net_paid_inc_tax, ss_net_profit
Column Types: float, float
Table: default.ss_part
PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_n0
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
group by ss_sold_date_sk,
ss_net_paid_inc_tax,
ss_net_profit
distribute by ss_sold_date_sk
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_n0
PREHOOK: Output: default@ss_part
POSTHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_n0
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
group by ss_sold_date_sk,
ss_net_paid_inc_tax,
ss_net_profit
distribute by ss_sold_date_sk
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_n0
POSTHOOK: Output: default@ss_part
POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452617
POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452638
POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_paid_inc_tax SIMPLE [(ss_n0)ss_n0.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ]
POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_profit SIMPLE [(ss_n0)ss_n0.FieldSchema(name:ss_net_profit, type:float, comment:null), ]
POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_paid_inc_tax SIMPLE [(ss_n0)ss_n0.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ]
POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_profit SIMPLE [(ss_n0)ss_n0.FieldSchema(name:ss_net_profit, type:float, comment:null), ]
PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617)
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@ss_part
POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617)
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@ss_part
# col_name data_type comment
ss_net_paid_inc_tax float
ss_net_profit float
# Partition Information
# col_name data_type comment
ss_sold_date_sk int
# Detailed Partition Information
Partition Value: [2452617]
Database: default
Table: ss_part
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}}
numFiles 1
numRows 11
rawDataSize 151
totalSize 162
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1
PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452617
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_part
PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452617
#### A masked pattern was here ####
POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452617
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_part
POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452617
#### A masked pattern was here ####
10022.63 3952.8 2452617
1765.07 -4648.8 2452617
2.1 -2026.3 2452617
2.99 -11.32 2452617
3423.95 -3164.07 2452617
5362.01 -600.28 2452617
552.96 -1363.84 2452617
565.92 196.48 2452617
7412.83 2071.68 2452617
85.8 25.61 2452617
879.07 -2185.76 2452617
PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638)
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@ss_part
POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638)
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@ss_part
# col_name data_type comment
ss_net_paid_inc_tax float
ss_net_profit float
# Partition Information
# col_name data_type comment
ss_sold_date_sk int
# Detailed Partition Information
Partition Value: [2452638]
Database: default
Table: ss_part
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}}
numFiles 1
numRows 13
rawDataSize 186
totalSize 199
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1
PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452638
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_part
PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452638
#### A masked pattern was here ####
POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452638
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_part
POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452638
#### A masked pattern was here ####
0.15 -241.22 2452638
10171.1 660.48 2452638
1327.08 57.97 2452638
1413.19 178.08 2452638
150.39 -162.12 2452638
1524.33 494.37 2452638
156.67 -4626.56 2452638
181.03 -207.24 2452638
1971.35 -488.25 2452638
267.01 -3266.36 2452638
317.87 -3775.38 2452638
4133.98 -775.72 2452638
4329.49 -4000.51 2452638
PREHOOK: query: explain insert overwrite table ss_part partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_n0
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
distribute by ss_sold_date_sk
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_n0
PREHOOK: Output: default@ss_part
POSTHOOK: query: explain insert overwrite table ss_part partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_n0
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
distribute by ss_sold_date_sk
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_n0
POSTHOOK: Output: default@ss_part
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: ss_n0
filterExpr: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean)
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean)
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ss_net_paid_inc_tax (type: float), ss_net_profit (type: float), ss_sold_date_sk (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col2 (type: int)
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.ss_part
Select Operator
expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int)
outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: min(ss_net_paid_inc_tax), max(ss_net_paid_inc_tax), count(1), count(ss_net_paid_inc_tax), compute_bit_vector(ss_net_paid_inc_tax, 'hll'), min(ss_net_profit), max(ss_net_profit), count(ss_net_profit), compute_bit_vector(ss_net_profit, 'hll')
keys: ss_sold_date_sk (type: int)
mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: 'DOUBLE' (type: string), UDFToDouble(_col1) (type: double), UDFToDouble(_col2) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col6) (type: double), UDFToDouble(_col7) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-2
Dependency Collection
Stage: Stage-0
Move Operator
tables:
partition:
ss_sold_date_sk
replace: true
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.ss_part
Stage: Stage-3
Stats Work
Basic Stats Work:
Column Stats Desc:
Columns: ss_net_paid_inc_tax, ss_net_profit
Column Types: float, float
Table: default.ss_part
PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_n0
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
distribute by ss_sold_date_sk
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_n0
PREHOOK: Output: default@ss_part
POSTHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_n0
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
distribute by ss_sold_date_sk
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_n0
POSTHOOK: Output: default@ss_part
POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452617
POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452638
POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_paid_inc_tax SIMPLE [(ss_n0)ss_n0.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ]
POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_profit SIMPLE [(ss_n0)ss_n0.FieldSchema(name:ss_net_profit, type:float, comment:null), ]
POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_paid_inc_tax SIMPLE [(ss_n0)ss_n0.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ]
POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_profit SIMPLE [(ss_n0)ss_n0.FieldSchema(name:ss_net_profit, type:float, comment:null), ]
PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617)
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@ss_part
POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617)
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@ss_part
# col_name data_type comment
ss_net_paid_inc_tax float
ss_net_profit float
# Partition Information
# col_name data_type comment
ss_sold_date_sk int
# Detailed Partition Information
Partition Value: [2452617]
Database: default
Table: ss_part
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}}
numFiles 1
numRows 11
rawDataSize 151
totalSize 162
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1
PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452617
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_part
PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452617
#### A masked pattern was here ####
POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452617
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_part
POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452617
#### A masked pattern was here ####
10022.63 3952.8 2452617
1765.07 -4648.8 2452617
2.1 -2026.3 2452617
2.99 -11.32 2452617
3423.95 -3164.07 2452617
5362.01 -600.28 2452617
552.96 -1363.84 2452617
565.92 196.48 2452617
7412.83 2071.68 2452617
85.8 25.61 2452617
879.07 -2185.76 2452617
PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638)
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@ss_part
POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638)
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@ss_part
# col_name data_type comment
ss_net_paid_inc_tax float
ss_net_profit float
# Partition Information
# col_name data_type comment
ss_sold_date_sk int
# Detailed Partition Information
Partition Value: [2452638]
Database: default
Table: ss_part
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}}
numFiles 1
numRows 13
rawDataSize 186
totalSize 199
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1
PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452638
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_part
PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452638
#### A masked pattern was here ####
POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452638
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_part
POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452638
#### A masked pattern was here ####
0.15 -241.22 2452638
10171.1 660.48 2452638
1327.08 57.97 2452638
1413.19 178.08 2452638
150.39 -162.12 2452638
1524.33 494.37 2452638
156.67 -4626.56 2452638
181.03 -207.24 2452638
1971.35 -488.25 2452638
267.01 -3266.36 2452638
317.87 -3775.38 2452638
4133.98 -775.72 2452638
4329.49 -4000.51 2452638
PREHOOK: query: create table ss_orc (
ss_sold_date_sk int,
ss_net_paid_inc_tax float,
ss_net_profit float) stored as orc
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@ss_orc
POSTHOOK: query: create table ss_orc (
ss_sold_date_sk int,
ss_net_paid_inc_tax float,
ss_net_profit float) stored as orc
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@ss_orc
PREHOOK: query: create table ss_part_orc (
ss_net_paid_inc_tax float,
ss_net_profit float)
partitioned by (ss_sold_date_sk int) stored as orc
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@ss_part_orc
POSTHOOK: query: create table ss_part_orc (
ss_net_paid_inc_tax float,
ss_net_profit float)
partitioned by (ss_sold_date_sk int) stored as orc
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@ss_part_orc
PREHOOK: query: insert overwrite table ss_orc select * from ss_n0
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_n0
PREHOOK: Output: default@ss_orc
POSTHOOK: query: insert overwrite table ss_orc select * from ss_n0
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_n0
POSTHOOK: Output: default@ss_orc
POSTHOOK: Lineage: ss_orc.ss_net_paid_inc_tax SIMPLE [(ss_n0)ss_n0.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ]
POSTHOOK: Lineage: ss_orc.ss_net_profit SIMPLE [(ss_n0)ss_n0.FieldSchema(name:ss_net_profit, type:float, comment:null), ]
POSTHOOK: Lineage: ss_orc.ss_sold_date_sk SIMPLE [(ss_n0)ss_n0.FieldSchema(name:ss_sold_date_sk, type:int, comment:null), ]
PREHOOK: query: drop table ss_n0
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@ss_n0
PREHOOK: Output: default@ss_n0
POSTHOOK: query: drop table ss_n0
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@ss_n0
POSTHOOK: Output: default@ss_n0
PREHOOK: query: drop table ss_part
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@ss_part
PREHOOK: Output: default@ss_part
POSTHOOK: query: drop table ss_part
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@ss_part
POSTHOOK: Output: default@ss_part
PREHOOK: query: explain insert overwrite table ss_part_orc partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_orc
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
group by ss_sold_date_sk,
ss_net_paid_inc_tax,
ss_net_profit
distribute by ss_sold_date_sk
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_orc
PREHOOK: Output: default@ss_part_orc
POSTHOOK: query: explain insert overwrite table ss_part_orc partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_orc
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
group by ss_sold_date_sk,
ss_net_paid_inc_tax,
ss_net_profit
distribute by ss_sold_date_sk
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_orc
POSTHOOK: Output: default@ss_part_orc
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: ss_orc
filterExpr: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean)
Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean)
Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float)
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 12 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float)
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 12 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: int), KEY._col1 (type: float), KEY._col2 (type: float)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 12 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col1 (type: float), _col2 (type: float), _col0 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 12 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 12 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.ss_part_orc
Select Operator
expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int)
outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk
Statistics: Num rows: 12 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: min(ss_net_paid_inc_tax), max(ss_net_paid_inc_tax), count(1), count(ss_net_paid_inc_tax), compute_bit_vector(ss_net_paid_inc_tax, 'hll'), min(ss_net_profit), max(ss_net_profit), count(ss_net_profit), compute_bit_vector(ss_net_profit, 'hll')
keys: ss_sold_date_sk (type: int)
mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 2 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: 'DOUBLE' (type: string), UDFToDouble(_col1) (type: double), UDFToDouble(_col2) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col6) (type: double), UDFToDouble(_col7) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 2 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 2 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-2
Dependency Collection
Stage: Stage-0
Move Operator
tables:
partition:
ss_sold_date_sk
replace: true
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.ss_part_orc
Stage: Stage-3
Stats Work
Basic Stats Work:
Column Stats Desc:
Columns: ss_net_paid_inc_tax, ss_net_profit
Column Types: float, float
Table: default.ss_part_orc
PREHOOK: query: insert overwrite table ss_part_orc partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_orc
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
group by ss_sold_date_sk,
ss_net_paid_inc_tax,
ss_net_profit
distribute by ss_sold_date_sk
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_orc
PREHOOK: Output: default@ss_part_orc
POSTHOOK: query: insert overwrite table ss_part_orc partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_orc
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
group by ss_sold_date_sk,
ss_net_paid_inc_tax,
ss_net_profit
distribute by ss_sold_date_sk
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_orc
POSTHOOK: Output: default@ss_part_orc
POSTHOOK: Output: default@ss_part_orc@ss_sold_date_sk=2452617
POSTHOOK: Output: default@ss_part_orc@ss_sold_date_sk=2452638
POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452617).ss_net_paid_inc_tax SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ]
POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452617).ss_net_profit SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_profit, type:float, comment:null), ]
POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452638).ss_net_paid_inc_tax SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ]
POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452638).ss_net_profit SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_profit, type:float, comment:null), ]
PREHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452617)
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@ss_part_orc
POSTHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452617)
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@ss_part_orc
# col_name data_type comment
ss_net_paid_inc_tax float
ss_net_profit float
# Partition Information
# col_name data_type comment
ss_sold_date_sk int
# Detailed Partition Information
Partition Value: [2452617]
Database: default
Table: ss_part_orc
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}}
numFiles 1
numRows 11
rawDataSize 88
totalSize 466
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1
PREHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452617
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_part_orc
PREHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452617
#### A masked pattern was here ####
POSTHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452617
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_part_orc
POSTHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452617
#### A masked pattern was here ####
10022.63 3952.8 2452617
1765.07 -4648.8 2452617
2.1 -2026.3 2452617
2.99 -11.32 2452617
3423.95 -3164.07 2452617
5362.01 -600.28 2452617
552.96 -1363.84 2452617
565.92 196.48 2452617
7412.83 2071.68 2452617
85.8 25.61 2452617
879.07 -2185.76 2452617
PREHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452638)
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@ss_part_orc
POSTHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452638)
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@ss_part_orc
# col_name data_type comment
ss_net_paid_inc_tax float
ss_net_profit float
# Partition Information
# col_name data_type comment
ss_sold_date_sk int
# Detailed Partition Information
Partition Value: [2452638]
Database: default
Table: ss_part_orc
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}}
numFiles 1
numRows 13
rawDataSize 104
totalSize 489
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1
PREHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452638
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_part_orc
PREHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452638
#### A masked pattern was here ####
POSTHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452638
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_part_orc
POSTHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452638
#### A masked pattern was here ####
0.15 -241.22 2452638
10171.1 660.48 2452638
1327.08 57.97 2452638
1413.19 178.08 2452638
150.39 -162.12 2452638
1524.33 494.37 2452638
156.67 -4626.56 2452638
181.03 -207.24 2452638
1971.35 -488.25 2452638
267.01 -3266.36 2452638
317.87 -3775.38 2452638
4133.98 -775.72 2452638
4329.49 -4000.51 2452638
PREHOOK: query: explain insert overwrite table ss_part_orc partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_orc
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
distribute by ss_sold_date_sk
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_orc
PREHOOK: Output: default@ss_part_orc
POSTHOOK: query: explain insert overwrite table ss_part_orc partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_orc
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
distribute by ss_sold_date_sk
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_orc
POSTHOOK: Output: default@ss_part_orc
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: ss_orc
filterExpr: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean)
Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean)
Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ss_net_paid_inc_tax (type: float), ss_net_profit (type: float), ss_sold_date_sk (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col2 (type: int)
Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.ss_part_orc
Select Operator
expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int)
outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk
Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: min(ss_net_paid_inc_tax), max(ss_net_paid_inc_tax), count(1), count(ss_net_paid_inc_tax), compute_bit_vector(ss_net_paid_inc_tax, 'hll'), min(ss_net_profit), max(ss_net_profit), count(ss_net_profit), compute_bit_vector(ss_net_profit, 'hll')
keys: ss_sold_date_sk (type: int)
mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 2 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: 'DOUBLE' (type: string), UDFToDouble(_col1) (type: double), UDFToDouble(_col2) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'DOUBLE' (type: string), UDFToDouble(_col6) (type: double), UDFToDouble(_col7) (type: double), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 2 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 2 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-2
Dependency Collection
Stage: Stage-0
Move Operator
tables:
partition:
ss_sold_date_sk
replace: true
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.ss_part_orc
Stage: Stage-3
Stats Work
Basic Stats Work:
Column Stats Desc:
Columns: ss_net_paid_inc_tax, ss_net_profit
Column Types: float, float
Table: default.ss_part_orc
PREHOOK: query: insert overwrite table ss_part_orc partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_orc
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
distribute by ss_sold_date_sk
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_orc
PREHOOK: Output: default@ss_part_orc
POSTHOOK: query: insert overwrite table ss_part_orc partition (ss_sold_date_sk)
select ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk
from ss_orc
where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
distribute by ss_sold_date_sk
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_orc
POSTHOOK: Output: default@ss_part_orc
POSTHOOK: Output: default@ss_part_orc@ss_sold_date_sk=2452617
POSTHOOK: Output: default@ss_part_orc@ss_sold_date_sk=2452638
POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452617).ss_net_paid_inc_tax SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ]
POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452617).ss_net_profit SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_profit, type:float, comment:null), ]
POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452638).ss_net_paid_inc_tax SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ]
POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452638).ss_net_profit SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_profit, type:float, comment:null), ]
PREHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452617)
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@ss_part_orc
POSTHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452617)
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@ss_part_orc
# col_name data_type comment
ss_net_paid_inc_tax float
ss_net_profit float
# Partition Information
# col_name data_type comment
ss_sold_date_sk int
# Detailed Partition Information
Partition Value: [2452617]
Database: default
Table: ss_part_orc
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}}
numFiles 1
numRows 11
rawDataSize 88
totalSize 466
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1
PREHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452617
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_part_orc
PREHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452617
#### A masked pattern was here ####
POSTHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452617
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_part_orc
POSTHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452617
#### A masked pattern was here ####
10022.63 3952.8 2452617
1765.07 -4648.8 2452617
2.1 -2026.3 2452617
2.99 -11.32 2452617
3423.95 -3164.07 2452617
5362.01 -600.28 2452617
552.96 -1363.84 2452617
565.92 196.48 2452617
7412.83 2071.68 2452617
85.8 25.61 2452617
879.07 -2185.76 2452617
PREHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452638)
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@ss_part_orc
POSTHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452638)
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@ss_part_orc
# col_name data_type comment
ss_net_paid_inc_tax float
ss_net_profit float
# Partition Information
# col_name data_type comment
ss_sold_date_sk int
# Detailed Partition Information
Partition Value: [2452638]
Database: default
Table: ss_part_orc
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}}
numFiles 1
numRows 13
rawDataSize 104
totalSize 489
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1
PREHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452638
PREHOOK: type: QUERY
PREHOOK: Input: default@ss_part_orc
PREHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452638
#### A masked pattern was here ####
POSTHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452638
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ss_part_orc
POSTHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452638
#### A masked pattern was here ####
0.15 -241.22 2452638
10171.1 660.48 2452638
1327.08 57.97 2452638
1413.19 178.08 2452638
150.39 -162.12 2452638
1524.33 494.37 2452638
156.67 -4626.56 2452638
181.03 -207.24 2452638
1971.35 -488.25 2452638
267.01 -3266.36 2452638
317.87 -3775.38 2452638
4133.98 -775.72 2452638
4329.49 -4000.51 2452638
PREHOOK: query: drop table ss_orc
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@ss_orc
PREHOOK: Output: default@ss_orc
POSTHOOK: query: drop table ss_orc
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@ss_orc
POSTHOOK: Output: default@ss_orc
PREHOOK: query: drop table ss_part_orc
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@ss_part_orc
PREHOOK: Output: default@ss_part_orc
POSTHOOK: query: drop table ss_part_orc
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@ss_part_orc
POSTHOOK: Output: default@ss_part_orc
PREHOOK: query: drop table if exists hive13_dp1
PREHOOK: type: DROPTABLE
POSTHOOK: query: drop table if exists hive13_dp1
POSTHOOK: type: DROPTABLE
PREHOOK: query: create table if not exists hive13_dp1 (
k1 int,
k2 int
)
PARTITIONED BY(`day` string)
STORED AS ORC
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@hive13_dp1
POSTHOOK: query: create table if not exists hive13_dp1 (
k1 int,
k2 int
)
PARTITIONED BY(`day` string)
STORED AS ORC
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@hive13_dp1
PREHOOK: query: explain insert overwrite table `hive13_dp1` partition(`day`)
select
key k1,
count(value) k2,
"day" `day`
from src
group by "day", key
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@hive13_dp1
POSTHOOK: query: explain insert overwrite table `hive13_dp1` partition(`day`)
select
key k1,
count(value) k2,
"day" `day`
from src
group by "day", key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@hive13_dp1
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: src
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: key, value
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(value)
keys: key (type: string)
minReductionHashAggr: 0.5
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int), 'day' (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.hive13_dp1
Select Operator
expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string)
outputColumnNames: k1, k2, day
Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: min(k1), max(k1), count(1), count(k1), compute_bit_vector(k1, 'hll'), min(k2), max(k2), count(k2), compute_bit_vector(k2, 'hll')
keys: day (type: string)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1 Data size: 415 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 415 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary)
Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8)
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1 Data size: 415 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-2
Dependency Collection
Stage: Stage-0
Move Operator
tables:
partition:
day
replace: true
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.hive13_dp1
Stage: Stage-3
Stats Work
Basic Stats Work:
Column Stats Desc:
Columns: k1, k2
Column Types: int, int
Table: default.hive13_dp1
PREHOOK: query: insert overwrite table `hive13_dp1` partition(`day`)
select
key k1,
count(value) k2,
"day" `day`
from src
group by "day", key
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@hive13_dp1
POSTHOOK: query: insert overwrite table `hive13_dp1` partition(`day`)
select
key k1,
count(value) k2,
"day" `day`
from src
group by "day", key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@hive13_dp1
POSTHOOK: Output: default@hive13_dp1@day=day
POSTHOOK: Lineage: hive13_dp1 PARTITION(day=day).k1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: hive13_dp1 PARTITION(day=day).k2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: select * from hive13_dp1 order by k1, k2 limit 5
PREHOOK: type: QUERY
PREHOOK: Input: default@hive13_dp1
PREHOOK: Input: default@hive13_dp1@day=day
#### A masked pattern was here ####
POSTHOOK: query: select * from hive13_dp1 order by k1, k2 limit 5
POSTHOOK: type: QUERY
POSTHOOK: Input: default@hive13_dp1
POSTHOOK: Input: default@hive13_dp1@day=day
#### A masked pattern was here ####
0 3 day
2 1 day
4 1 day
5 3 day
8 1 day
PREHOOK: query: explain insert overwrite table `hive13_dp1` partition(`day`)
select
key k1,
count(value) k2,
"day" `day`
from src
group by "day", key
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@hive13_dp1
POSTHOOK: query: explain insert overwrite table `hive13_dp1` partition(`day`)
select
key k1,
count(value) k2,
"day" `day`
from src
group by "day", key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@hive13_dp1
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: src
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: key, value
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(value)
keys: key (type: string)
minReductionHashAggr: 0.5
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int), 'day' (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.hive13_dp1
Select Operator
expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string)
outputColumnNames: k1, k2, day
Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: min(k1), max(k1), count(1), count(k1), compute_bit_vector(k1, 'hll'), min(k2), max(k2), count(k2), compute_bit_vector(k2, 'hll')
keys: day (type: string)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1 Data size: 415 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 415 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary)
Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8)
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1 Data size: 415 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: 'LONG' (type: string), UDFToLong(_col1) (type: bigint), UDFToLong(_col2) (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 615 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-2
Dependency Collection
Stage: Stage-0
Move Operator
tables:
partition:
day
replace: true
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.hive13_dp1
Stage: Stage-3
Stats Work
Basic Stats Work:
Column Stats Desc:
Columns: k1, k2
Column Types: int, int
Table: default.hive13_dp1
PREHOOK: query: insert overwrite table `hive13_dp1` partition(`day`)
select
key k1,
count(value) k2,
"day" `day`
from src
group by "day", key
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@hive13_dp1
POSTHOOK: query: insert overwrite table `hive13_dp1` partition(`day`)
select
key k1,
count(value) k2,
"day" `day`
from src
group by "day", key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@hive13_dp1
POSTHOOK: Output: default@hive13_dp1@day=day
POSTHOOK: Lineage: hive13_dp1 PARTITION(day=day).k1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: hive13_dp1 PARTITION(day=day).k2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: select * from hive13_dp1 order by k1, k2 limit 5
PREHOOK: type: QUERY
PREHOOK: Input: default@hive13_dp1
PREHOOK: Input: default@hive13_dp1@day=day
#### A masked pattern was here ####
POSTHOOK: query: select * from hive13_dp1 order by k1, k2 limit 5
POSTHOOK: type: QUERY
POSTHOOK: Input: default@hive13_dp1
POSTHOOK: Input: default@hive13_dp1@day=day
#### A masked pattern was here ####
0 3 day
2 1 day
4 1 day
5 3 day
8 1 day
PREHOOK: query: drop table hive13_dp1
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@hive13_dp1
PREHOOK: Output: default@hive13_dp1
POSTHOOK: query: drop table hive13_dp1
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@hive13_dp1
POSTHOOK: Output: default@hive13_dp1