blob: 1a78bf0677f12f79f679e02a1376c78d240db1ab [file] [log] [blame]
SET hive.vectorized.execution.enabled=true;
set hive.vectorized.execution.ptf.enabled=true;
set hive.fetch.task.conversion=none;
-- SORT_QUERY_RESULTS
DROP TABLE part_staging;
DROP TABLE part_orc;
-- NOTE: This test is a copy of ptf.
-- NOTE: We cannot vectorize "pure" table functions (e.g. NOOP) -- given their blackbox nature. So only queries without table functions and
-- NOTE: with windowing will be vectorized.
-- data setup
CREATE TABLE part_staging(
p_partkey INT,
p_name STRING,
p_mfgr STRING,
p_brand STRING,
p_type STRING,
p_size INT,
p_container STRING,
p_retailprice DOUBLE,
p_comment STRING
);
LOAD DATA LOCAL INPATH '../../data/files/tpch/tiny/part.tbl.bz2' overwrite into table part_staging;
CREATE TABLE part_orc(
p_partkey INT,
p_name STRING,
p_mfgr STRING,
p_brand STRING,
p_type STRING,
p_size INT,
p_container STRING,
p_retailprice DOUBLE,
p_comment STRING
) STORED AS ORC;
DESCRIBE EXTENDED part_orc;
insert into table part_orc select * from part_staging;
--1. test1
explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name
);
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name
);
-- 2. testJoinWithNoop
explain vectorization detail
select p_mfgr, p_name,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j
distribute by j.p_mfgr
sort by j.p_name)
;
select p_mfgr, p_name,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j
distribute by j.p_mfgr
sort by j.p_name)
;
-- 3. testOnlyPTF
explain vectorization detail
select p_mfgr, p_name, p_size
from noop(on part_orc
partition by p_mfgr
order by p_name);
select p_mfgr, p_name, p_size
from noop(on part_orc
partition by p_mfgr
order by p_name);
-- 4. testPTFAlias
explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name
) abc;
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name
) abc;
-- 5. testPTFAndWhereWithWindowing
explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop(on part_orc
partition by p_mfgr
order by p_name
)
;
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop(on part_orc
partition by p_mfgr
order by p_name
)
;
-- 6. testSWQAndPTFAndGBy
explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop(on part_orc
partition by p_mfgr
order by p_name
)
group by p_mfgr, p_name, p_size
;
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop(on part_orc
partition by p_mfgr
order by p_name
)
group by p_mfgr, p_name, p_size
;
-- 7. testJoin
explain vectorization detail
select abc.*
from noop(on part_orc
partition by p_mfgr
order by p_name
) abc join part_orc p1 on abc.p_partkey = p1.p_partkey;
select abc.*
from noop(on part_orc
partition by p_mfgr
order by p_name
) abc join part_orc p1 on abc.p_partkey = p1.p_partkey;
-- 8. testJoinRight
explain vectorization detail
select abc.*
from part_orc p1 join noop(on part_orc
partition by p_mfgr
order by p_name
) abc on abc.p_partkey = p1.p_partkey;
select abc.*
from part_orc p1 join noop(on part_orc
partition by p_mfgr
order by p_name
) abc on abc.p_partkey = p1.p_partkey;
-- 9. testNoopWithMap
explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name, p_size desc) as r
from noopwithmap(on part_orc
partition by p_mfgr
order by p_name, p_size desc);
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name, p_size desc) as r
from noopwithmap(on part_orc
partition by p_mfgr
order by p_name, p_size desc);
-- 10. testNoopWithMapWithWindowing
explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noopwithmap(on part_orc
partition by p_mfgr
order by p_name);
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noopwithmap(on part_orc
partition by p_mfgr
order by p_name);
-- 11. testHavingWithWindowingPTFNoGBY
explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name)
;
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name)
;
-- 12. testFunctionChain
explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on noopwithmap(on noop(on part_orc
partition by p_mfgr
order by p_mfgr, p_name
)));
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row),2) as s1
from noop(on noopwithmap(on noop(on part_orc
partition by p_mfgr
order by p_mfgr, p_name
)));
-- 13. testPTFAndWindowingInSubQ
explain vectorization detail
select p_mfgr, p_name,
sub1.cd, sub1.s1
from (select p_mfgr, p_name,
count(p_size) over (partition by p_mfgr order by p_name) as cd,
p_retailprice,
round(sum(p_retailprice) over w1,2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name)
window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following)
) sub1 ;
select p_mfgr, p_name,
sub1.cd, sub1.s1
from (select p_mfgr, p_name,
count(p_size) over (partition by p_mfgr order by p_name) as cd,
p_retailprice,
round(sum(p_retailprice) over w1,2) as s1
from noop(on part_orc
partition by p_mfgr
order by p_name)
window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following)
) sub1 ;
-- 14. testPTFJoinWithWindowingWithCount
explain vectorization detail
select abc.p_mfgr, abc.p_name,
rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r,
dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr,
count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd,
abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1,
abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz
from noop(on part_orc
partition by p_mfgr
order by p_name
) abc join part_orc p1 on abc.p_partkey = p1.p_partkey
;
select abc.p_mfgr, abc.p_name,
rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r,
dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr,
count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd,
abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1,
abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz
from noop(on part_orc
partition by p_mfgr
order by p_name
) abc join part_orc p1 on abc.p_partkey = p1.p_partkey
;
-- 15. testDistinctInSelectWithPTF
explain vectorization detail
select DISTINCT p_mfgr, p_name, p_size
from noop(on part_orc
partition by p_mfgr
order by p_name);
select DISTINCT p_mfgr, p_name, p_size
from noop(on part_orc
partition by p_mfgr
order by p_name);
-- 16. testViewAsTableInputToPTF
create view IF NOT EXISTS mfgr_price_view_n0 as
select p_mfgr, p_brand,
round(sum(p_retailprice),2) as s
from part_orc
group by p_mfgr, p_brand;
explain vectorization detail
select p_mfgr, p_brand, s,
round(sum(s) over w1,2) as s1
from noop(on mfgr_price_view_n0
partition by p_mfgr
order by p_mfgr)
window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row);
select p_mfgr, p_brand, s,
round(sum(s) over w1,2) as s1
from noop(on mfgr_price_view_n0
partition by p_mfgr
order by p_mfgr)
window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row);
-- 17. testMultipleInserts2SWQsWithPTF
CREATE TABLE part_4(
p_mfgr STRING,
p_name STRING,
p_size INT,
r INT,
dr INT,
s DOUBLE);
CREATE TABLE part_5(
p_mfgr STRING,
p_name STRING,
p_size INT,
s2 INT,
r INT,
dr INT,
cud DOUBLE,
fv1 INT);
explain vectorization detail
from noop(on part_orc
partition by p_mfgr
order by p_name)
INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size,
rank() over (distribute by p_mfgr sort by p_name) as r,
dense_rank() over (distribute by p_mfgr sort by p_name) as dr,
round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s
INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size,
round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2,
rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r,
dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr,
cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud,
first_value(p_size, true) over w1 as fv1
window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
from noop(on part_orc
partition by p_mfgr
order by p_name)
INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size,
rank() over (distribute by p_mfgr sort by p_name) as r,
dense_rank() over (distribute by p_mfgr sort by p_name) as dr,
round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s
INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size,
round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2,
rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r,
dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr,
cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud,
first_value(p_size, true) over w1 as fv1
window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
select * from part_4;
select * from part_5;
-- 18. testMulti2OperatorsFunctionChainWithMap
explain vectorization detail
select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1
from noop(on
noopwithmap(on
noop(on
noop(on part_orc
partition by p_mfgr
order by p_mfgr)
)
partition by p_mfgr,p_name
order by p_mfgr,p_name)
partition by p_mfgr,p_name
order by p_mfgr,p_name) ;
select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1
from noop(on
noopwithmap(on
noop(on
noop(on part_orc
partition by p_mfgr
order by p_mfgr)
)
partition by p_mfgr,p_name
order by p_mfgr,p_name)
partition by p_mfgr,p_name
order by p_mfgr,p_name) ;
-- 19. testMulti3OperatorsFunctionChain
explain vectorization detail
select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
from noop(on
noop(on
noop(on
noop(on part_orc
partition by p_mfgr
order by p_mfgr)
)
partition by p_mfgr,p_name
order by p_mfgr,p_name)
partition by p_mfgr
order by p_mfgr ) ;
select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1
from noop(on
noop(on
noop(on
noop(on part_orc
partition by p_mfgr
order by p_mfgr)
)
partition by p_mfgr,p_name
order by p_mfgr,p_name)
partition by p_mfgr
order by p_mfgr ) ;
-- 20. testMultiOperatorChainWithNoWindowing
explain vectorization detail
select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1
from noop(on
noop(on
noop(on
noop(on part_orc
partition by p_mfgr,p_name
order by p_mfgr,p_name)
)
partition by p_mfgr
order by p_mfgr));
select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1
from noop(on
noop(on
noop(on
noop(on part_orc
partition by p_mfgr,p_name
order by p_mfgr,p_name)
)
partition by p_mfgr
order by p_mfgr));
-- 21. testMultiOperatorChainEndsWithNoopMap
explain vectorization detail
select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1
from noopwithmap(on
noop(on
noop(on
noop(on part_orc
partition by p_mfgr,p_name
order by p_mfgr,p_name)
)
partition by p_mfgr
order by p_mfgr)
partition by p_mfgr,p_name
order by p_mfgr,p_name);
select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name) as dr,
p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1
from noopwithmap(on
noop(on
noop(on
noop(on part_orc
partition by p_mfgr,p_name
order by p_mfgr,p_name)
)
partition by p_mfgr
order by p_mfgr)
partition by p_mfgr,p_name
order by p_mfgr,p_name);
-- 22. testMultiOperatorChainWithDiffPartitionForWindow1
explain vectorization detail
select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr,
p_size,
sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1,
sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2
from noop(on
noopwithmap(on
noop(on part_orc
partition by p_mfgr, p_name
order by p_mfgr, p_name)
partition by p_mfgr
order by p_mfgr
));
select p_mfgr, p_name,
rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r,
dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr,
p_size,
sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1,
sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2
from noop(on
noopwithmap(on
noop(on part_orc
partition by p_mfgr, p_name
order by p_mfgr, p_name)
partition by p_mfgr
order by p_mfgr
));
-- 23. testMultiOperatorChainWithDiffPartitionForWindow2
explain vectorization detail
select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size,
sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1,
sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2
from noopwithmap(on
noop(on
noop(on part_orc
partition by p_mfgr, p_name
order by p_mfgr, p_name)
));
select p_mfgr, p_name,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
p_size,
sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1,
sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2
from noopwithmap(on
noop(on
noop(on part_orc
partition by p_mfgr, p_name
order by p_mfgr, p_name)
));