blob: 9a1afefea2157f9990b896755fb8e6b84c15d17f [file] [log] [blame]
--! qt:dataset:part
set hive.mapred.mode=nonstrict;
set hive.cbo.enable=true;
set hive.cbo.returnpath.hiveop=true;
set hive.exec.check.crossproducts=false;
set mapred.reduce.tasks=4;
-- SORT_QUERY_RESULTS
-- 1. testWindowing
select p_mfgr, p_name, p_size,
rank() over(distribute by p_mfgr sort by p_name) as r,
dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1
from part
;
-- 2. testGroupByWithPartitioning
select p_mfgr, p_name, p_size,
min(p_retailprice),
rank() over(distribute by p_mfgr sort by p_name)as r,
dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
from part
group by p_mfgr, p_name, p_size
;
-- 3. testGroupByHavingWithSWQ
select p_mfgr, p_name, p_size, min(p_retailprice),
rank() over(distribute by p_mfgr sort by p_name) as r,
dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
from part
group by p_mfgr, p_name, p_size
having p_size > 0
;
-- 4. testCount
select p_mfgr, p_name,
count(p_size) over(distribute by p_mfgr sort by p_name) as cd
from part
;
-- 5. testCountWithWindowingUDAF
select p_mfgr, p_name,
rank() over(distribute by p_mfgr sort by p_name) as r,
dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
count(p_size) over(distribute by p_mfgr sort by p_name) as cd,
p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1,
p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
from part
;
-- 6. testCountInSubQ
select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz
from (select p_mfgr, p_name,
rank() over(distribute by p_mfgr sort by p_name) as r,
dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
count(p_size) over(distribute by p_mfgr sort by p_name) as cd,
p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1,
p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
from part
) sub1;
-- 7. testJoinWithWindowingAndPTF
select abc.p_mfgr, abc.p_name,
rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r,
dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr,
abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1,
abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over(distribute by abc.p_mfgr sort by abc.p_name) as deltaSz
from noop(on part
partition by p_mfgr
order by p_name
) abc join part p1 on abc.p_partkey = p1.p_partkey
;
-- 8. testMixedCaseAlias
select p_mfgr, p_name, p_size,
rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R
from part
;
-- 9. testHavingWithWindowingNoGBY
select p_mfgr, p_name, p_size,
rank() over(distribute by p_mfgr sort by p_name) as r,
dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1
from part
;
-- 10. testHavingWithWindowingCondRankNoGBY
select p_mfgr, p_name, p_size,
rank() over(distribute by p_mfgr sort by p_name) as r,
dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1
from part
;
-- 11. testFirstLast
select p_mfgr,p_name, p_size,
sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2,
first_value(p_size) over w1 as f,
last_value(p_size, false) over w1 as l
from part
window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following);
-- 12. testFirstLastWithWhere
select p_mfgr,p_name, p_size,
rank() over(distribute by p_mfgr sort by p_name) as r,
sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2,
first_value(p_size) over w1 as f,
last_value(p_size, false) over w1 as l
from part
where p_mfgr = 'Manufacturer#3'
window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following);
-- 13. testSumWindow
select p_mfgr,p_name, p_size,
sum(p_size) over w1 as s1,
sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2
from part
window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following);
-- 14. testNoSortClause
select p_mfgr,p_name, p_size,
rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr
from part
window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following);
-- 15. testExpressions
select p_mfgr,p_name, p_size,
rank() over(distribute by p_mfgr sort by p_name) as r,
dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
cume_dist() over(distribute by p_mfgr sort by p_name) as cud,
percent_rank() over(distribute by p_mfgr sort by p_name) as pr,
ntile(3) over(distribute by p_mfgr sort by p_name) as nt,
count(p_size) over(distribute by p_mfgr sort by p_name) as ca,
avg(p_size) over(distribute by p_mfgr sort by p_name) as avg,
stddev(p_size) over(distribute by p_mfgr sort by p_name) as st,
first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv,
last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv,
first_value(p_size) over w1 as fvW1
from part
window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
-- 16. testMultipleWindows
select p_mfgr,p_name, p_size,
rank() over(distribute by p_mfgr sort by p_name) as r,
dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
cume_dist() over(distribute by p_mfgr sort by p_name) as cud,
sum(p_size) over (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) as s1,
sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row) as s2,
first_value(p_size) over w1 as fv1
from part
window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
-- 17. testCountStar
select p_mfgr,p_name, p_size,
count(*) over(distribute by p_mfgr sort by p_name ) as c,
count(p_size) over(distribute by p_mfgr sort by p_name) as ca,
first_value(p_size) over w1 as fvW1
from part
window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
-- 18. testUDAFs
select p_mfgr,p_name, p_size,
round(sum(p_retailprice) over w1,2) as s,
min(p_retailprice) over w1 as mi,
max(p_retailprice) over w1 as ma,
round(avg(p_retailprice) over w1,2) as ag
from part
window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
-- 19. testUDAFsWithGBY
select p_mfgr,p_name, p_size, p_retailprice,
round(sum(p_retailprice) over w1,2) as s,
min(p_retailprice) as mi ,
max(p_retailprice) as ma ,
round(avg(p_retailprice) over w1,2) as ag
from part
group by p_mfgr,p_name, p_size, p_retailprice
window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
-- 20. testSTATs
select p_mfgr,p_name, p_size,
stddev(p_retailprice) over w1 as sdev,
stddev_pop(p_retailprice) over w1 as sdev_pop,
collect_set(p_size) over w1 as uniq_size,
variance(p_retailprice) over w1 as var,
round(corr(p_size, p_retailprice) over w1,5) as cor,
covar_pop(p_size, p_retailprice) over w1 as covarp
from part
window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
-- 21. testDISTs
select p_mfgr,p_name, p_size,
histogram_numeric(p_retailprice, 5) over w1 as hist,
percentile(p_partkey, 0.5) over w1 as per,
row_number() over(distribute by p_mfgr sort by p_mfgr, p_name) as rn
from part
window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
set hive.cbo.returnpath.hiveop=false;
-- 22. testViewAsTableInputWithWindowing
create view IF NOT EXISTS mfgr_price_view_n4 as
select p_mfgr, p_brand,
round(sum(p_retailprice),2) as s
from part
group by p_mfgr, p_brand;
set hive.cbo.returnpath.hiveop=true;
select *
from (
select p_mfgr, p_brand, s,
round(sum(s) over w1 , 2) as s1
from mfgr_price_view_n4
window w1 as (distribute by p_mfgr sort by p_mfgr )
) sq
order by p_mfgr, p_brand;
select p_mfgr, p_brand, s,
round(sum(s) over w1 ,2) as s1
from mfgr_price_view_n4
window w1 as (distribute by p_mfgr sort by p_brand rows between 2 preceding and current row);
set hive.cbo.returnpath.hiveop=false;
-- 23. testCreateViewWithWindowingQuery
create view IF NOT EXISTS mfgr_brand_price_view_n1 as
select p_mfgr, p_brand,
round(sum(p_retailprice) over w1,2) as s
from part
window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and current row);
set hive.cbo.returnpath.hiveop=true ;
select * from mfgr_brand_price_view_n1;
-- 24. testLateralViews
select p_mfgr, p_name,
lv_col, p_size, sum(p_size) over w1 as s
from (select p_mfgr, p_name, p_size, array(1,2,3) arr from part) p
lateral view explode(arr) part_lv as lv_col
window w1 as (distribute by p_mfgr sort by p_size, lv_col rows between 2 preceding and current row);
-- 25. testMultipleInserts3SWQs
CREATE TABLE part_1_n1(
p_mfgr STRING,
p_name STRING,
p_size INT,
r INT,
dr INT,
s DOUBLE);
CREATE TABLE part_2_n1(
p_mfgr STRING,
p_name STRING,
p_size INT,
r INT,
dr INT,
cud INT,
s2 DOUBLE,
fv1 INT);
CREATE TABLE part_3_n1(
p_mfgr STRING,
p_name STRING,
p_size INT,
c INT,
ca INT,
fv INT);
from part
INSERT OVERWRITE TABLE part_1_n1
select p_mfgr, p_name, p_size,
rank() over(distribute by p_mfgr sort by p_name ) as r,
dense_rank() over(distribute by p_mfgr sort by p_name ) as dr,
round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s
INSERT OVERWRITE TABLE part_2_n1
select p_mfgr,p_name, p_size,
rank() over(distribute by p_mfgr sort by p_name) as r,
dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
cume_dist() over(distribute by p_mfgr sort by p_name) as cud,
round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2,
first_value(p_size) over w1 as fv1
window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following)
INSERT OVERWRITE TABLE part_3_n1
select p_mfgr,p_name, p_size,
count(*) over(distribute by p_mfgr sort by p_name) as c,
count(p_size) over(distribute by p_mfgr sort by p_name) as ca,
first_value(p_size) over w1 as fv
window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
select * from part_1_n1;
select * from part_2_n1;
select * from part_3_n1;
-- 26. testGroupByHavingWithSWQAndAlias
select p_mfgr, p_name, p_size, min(p_retailprice) as mi,
rank() over(distribute by p_mfgr sort by p_name) as r,
dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
from part
group by p_mfgr, p_name, p_size
having p_size > 0
;
-- 27. testMultipleRangeWindows
select p_mfgr,p_name, p_size,
sum(p_size) over (distribute by p_mfgr sort by p_size range between 10 preceding and current row) as s2,
sum(p_size) over (distribute by p_mfgr sort by p_size range between current row and 10 following ) as s1
from part
window w1 as (rows between 2 preceding and 2 following);
-- 28. testPartOrderInUDAFInvoke
select p_mfgr, p_name, p_size,
sum(p_size) over (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) as s
from part;
-- 29. testPartOrderInWdwDef
select p_mfgr, p_name, p_size,
sum(p_size) over w1 as s
from part
window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following);
-- 30. testDefaultPartitioningSpecRules
select p_mfgr, p_name, p_size,
sum(p_size) over w1 as s,
sum(p_size) over w2 as s2
from part
window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following),
w2 as (partition by p_mfgr order by p_name);
-- 31. testWindowCrossReference
select p_mfgr, p_name, p_size,
sum(p_size) over w1 as s1,
sum(p_size) over w2 as s2
from part
window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following),
w2 as w1;
-- 32. testWindowInheritance
select p_mfgr, p_name, p_size,
sum(p_size) over w1 as s1,
sum(p_size) over w2 as s2
from part
window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following),
w2 as (w1 rows between unbounded preceding and current row);
-- 33. testWindowForwardReference
select p_mfgr, p_name, p_size,
sum(p_size) over w1 as s1,
sum(p_size) over w2 as s2,
sum(p_size) over w3 as s3
from part
window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following),
w2 as w3,
w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row);
-- 34. testWindowDefinitionPropagation
select p_mfgr, p_name, p_size,
sum(p_size) over w1 as s1,
sum(p_size) over w2 as s2,
sum(p_size) over (w3 rows between 2 preceding and 2 following) as s3
from part
window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following),
w2 as w3,
w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row);
-- 35. testDistinctWithWindowing
select DISTINCT p_mfgr, p_name, p_size,
sum(p_size) over w1 as s
from part
window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following);
-- 36. testRankWithPartitioning
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name ) as r
from part;
-- 37. testPartitioningVariousForms
select p_mfgr,
round(sum(p_retailprice) over (partition by p_mfgr order by p_mfgr),2) as s1,
min(p_retailprice) over (partition by p_mfgr) as s2,
max(p_retailprice) over (distribute by p_mfgr sort by p_mfgr) as s3,
round(avg(p_retailprice) over (distribute by p_mfgr),2) as s4,
count(p_retailprice) over (cluster by p_mfgr ) as s5
from part;
-- 38. testPartitioningVariousForms2
select p_mfgr, p_name, p_size,
round(sum(p_retailprice) over (partition by p_mfgr, p_name order by p_mfgr, p_name rows between unbounded preceding and current row),2) as s1,
min(p_retailprice) over (distribute by p_mfgr, p_name sort by p_mfgr, p_name rows between unbounded preceding and current row) as s2,
max(p_retailprice) over (partition by p_mfgr, p_name order by p_name) as s3
from part;
-- 39. testUDFOnOrderCols
select p_mfgr, p_type, substr(p_type, 2) as short_ptype,
rank() over (partition by p_mfgr order by substr(p_type, 2)) as r
from part;
-- 40. testNoBetweenForRows
select p_mfgr, p_name, p_size,
round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows unbounded preceding),2) as s1
from part ;
-- 41. testNoBetweenForRange
select p_mfgr, p_name, p_size,
round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range unbounded preceding),2) as s1
from part ;
-- 42. testUnboundedFollowingForRows
select p_mfgr, p_name, p_size,
round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between current row and unbounded following),2) as s1
from part ;
-- 43. testUnboundedFollowingForRange
select p_mfgr, p_name, p_size,
round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range between current row and unbounded following),2) as s1
from part ;
-- 44. testOverNoPartitionSingleAggregate
select p_name, p_retailprice,
round(avg(p_retailprice) over(),2)
from part
order by p_name;
-- 45. empty partition test
select p_mfgr,
sum(p_size) over (partition by p_mfgr order by p_size rows between unbounded preceding and current row)
from part
where p_mfgr = 'Manufacturer#6'
;
-- 46. window sz is same as partition sz
select p_retailprice, round(avg(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2),
round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2)
from part
where p_mfgr='Manufacturer#1';
-- 47. empty partition
select sum(p_size) over (partition by p_mfgr )
from part where p_mfgr = 'm1';