blob: 72d8288917e2ea7262c21f740c90a7ca2110ac0a [file] [log] [blame]
====
---- QUERY
select year, month,
count(int_col) over (partition by year, month),
avg(int_col) over (partition by year, month),
avg(timestamp_col) over (partition by year, month),
min(string_col) over (partition by year, month),
max(string_col) over (partition by year, month)
from alltypessmall
where id % 4 = 0 and month != 1;
---- RESULTS: VERIFY_IS_EQUAL_SORTED
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666746,'1','9'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000143,'0','8'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666746,'1','9'
---- DBAPI_RESULTS: VERIFY_IS_EQUAL_SORTED
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666,'1','9'
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666,'1','9'
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666,'1','9'
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666,'1','9'
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666,'1','9'
2009,2,6,4.666666666666667,2009-02-01 20:13:00.541666,'1','9'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000,'0','8'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000,'0','8'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000,'0','8'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000,'0','8'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000,'0','8'
2009,3,6,3.666666666666667,2009-03-01 20:12:00.475000,'0','8'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666,'1','9'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666,'1','9'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666,'1','9'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666,'1','9'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666,'1','9'
2009,4,6,4.333333333333333,2009-04-01 16:11:00.416666,'1','9'
---- TYPES
INT, INT, BIGINT, DOUBLE, TIMESTAMP, STRING, STRING
====
---- QUERY
select date_part,
count(date_col) over (partition by date_part),
min(date_col) over (partition by date_part),
max(date_col) over (partition by date_part)
from functional.date_tbl
order by date_part;
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0001-01-01,6,0001-01-01,9999-12-31
0001-01-01,6,0001-01-01,9999-12-31
0001-01-01,6,0001-01-01,9999-12-31
0001-01-01,6,0001-01-01,9999-12-31
0001-01-01,6,0001-01-01,9999-12-31
0001-01-01,6,0001-01-01,9999-12-31
0001-01-01,6,0001-01-01,9999-12-31
1399-06-27,2,2017-11-28,2018-12-31
1399-06-27,2,2017-11-28,2018-12-31
1399-06-27,2,2017-11-28,2018-12-31
2017-11-27,10,0001-06-21,2017-11-28
2017-11-27,10,0001-06-21,2017-11-28
2017-11-27,10,0001-06-21,2017-11-28
2017-11-27,10,0001-06-21,2017-11-28
2017-11-27,10,0001-06-21,2017-11-28
2017-11-27,10,0001-06-21,2017-11-28
2017-11-27,10,0001-06-21,2017-11-28
2017-11-27,10,0001-06-21,2017-11-28
2017-11-27,10,0001-06-21,2017-11-28
2017-11-27,10,0001-06-21,2017-11-28
9999-12-31,2,9999-12-01,9999-12-31
9999-12-31,2,9999-12-01,9999-12-31
---- TYPES
DATE, BIGINT, DATE, DATE
====
---- QUERY
select int_col,
count(int_col) over ()
from alltypessmall
where id % 4 = 0 and month = 1
order by int_col
---- RESULTS
0,7
0,7
2,7
4,7
4,7
6,7
8,7
---- TYPES
INT, BIGINT
====
---- QUERY
select tinyint_col,
count(int_col) over (order by tinyint_col)
from alltypessmall
where month = 1 and tinyint_col < 5
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,3
0,3
0,3
1,6
1,6
1,6
2,9
2,9
2,9
3,12
3,12
3,12
4,15
4,15
4,15
---- TYPES
TINYINT, BIGINT
====
---- QUERY
# Test ordering expressions having NULL values
select tinyint_col, smallint_col, int_col,
sum(smallint_col) over (order by tinyint_col nulls first)
from alltypesagg
where year = 2010 and month = 1 and day = 4 and id <= 3030 and bool_col = true
order by tinyint_col, id, smallint_col, int_col
---- RESULTS
2,2,2,96
2,12,12,96
2,22,22,96
4,4,4,138
4,14,14,138
4,24,24,138
6,6,6,186
6,16,16,186
6,26,26,186
8,8,8,240
8,18,18,240
8,28,28,240
NULL,NULL,NULL,60
NULL,10,10,60
NULL,20,20,60
NULL,30,30,60
---- TYPES
TINYINT, SMALLINT, INT, BIGINT
====
---- QUERY
# Test partitions and ordering expressions having NULL values for both
# the default window and ROWS windows.
select tinyint_col, smallint_col,
sum(smallint_col) over (partition by tinyint_col order by smallint_col),
sum(smallint_col) over (partition by tinyint_col order by smallint_col
rows between unbounded preceding and 1 following),
sum(smallint_col) over (partition by tinyint_col order by smallint_col
rows between 1 following and 2 following)
from alltypesagg
where year = 2010 and month = 1 and day = 4 and id <= 3200
and (smallint_col < 50 or smallint_col is NULL)
and (tinyint_col = 1 or tinyint_col is NULL)
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,1,2,2,12
1,1,2,13,22
1,11,24,24,32
1,11,24,45,42
1,21,66,66,52
1,21,66,97,62
1,31,128,128,72
1,31,128,169,82
1,41,210,210,41
1,41,210,210,NULL
NULL,10,20,20,30
NULL,10,20,40,40
NULL,20,60,60,50
NULL,20,60,90,60
NULL,30,120,120,70
NULL,30,120,160,80
NULL,40,200,200,40
NULL,40,200,200,NULL
NULL,NULL,200,200,NULL
NULL,NULL,200,200,NULL
NULL,NULL,200,200,NULL
---- TYPES
TINYINT, SMALLINT, BIGINT, BIGINT, BIGINT
====
---- QUERY
# Test sum() NULL handling with different sliding windows (sum() has special NULL
# handling in Remove()).
select
sum(tinyint_col) over (order by id nulls last rows between 1 preceding and 1 preceding),
sum(tinyint_col) over (order by id nulls last rows between 1 preceding and current row),
sum(tinyint_col) over (order by id nulls last rows between current row and 1 following),
sum(tinyint_col) over (order by id nulls last rows between 1 following and 1 following)
from alltypesagg where id < 30 and (tinyint_col is null or tinyint_col < 6)
---- RESULTS: VERIFY_IS_EQUAL_SORTED
NULL,NULL,NULL,NULL
NULL,NULL,1,1
NULL,1,3,2
1,3,5,3
2,5,7,4
3,7,9,5
4,9,5,NULL
5,5,NULL,NULL
NULL,NULL,1,1
NULL,1,3,2
1,3,5,3
2,5,7,4
3,7,9,5
4,9,5,NULL
5,5,NULL,NULL
NULL,NULL,1,1
NULL,1,3,2
1,3,5,3
2,5,7,4
3,7,9,5
4,9,5,NULL
---- TYPES
BIGINT, BIGINT, BIGINT, BIGINT
====
---- QUERY
# Test multiple partition exprs and multiple order by exprs
select year, month, bool_col, tinyint_col,
count(id) over (partition by year, month order by bool_col, tinyint_col)
from alltypes
where tinyint_col < 2 and month < 3 and (id % 100 < 10)
order by year, month, bool_col, tinyint_col
---- RESULTS
2009,1,false,1,4
2009,1,false,1,4
2009,1,false,1,4
2009,1,false,1,4
2009,1,true,0,8
2009,1,true,0,8
2009,1,true,0,8
2009,1,true,0,8
2009,2,false,1,2
2009,2,false,1,2
2009,2,true,0,4
2009,2,true,0,4
2010,1,false,1,3
2010,1,false,1,3
2010,1,false,1,3
2010,1,true,0,6
2010,1,true,0,6
2010,1,true,0,6
2010,2,false,1,3
2010,2,false,1,3
2010,2,false,1,3
2010,2,true,0,6
2010,2,true,0,6
2010,2,true,0,6
---- TYPES
INT, INT, BOOLEAN, TINYINT, BIGINT
====
---- QUERY
# Same as above but with no ordering and a limit. Only count the number of results
# because they are non-deterministic.
select count(*) from
(select year, month, bool_col, tinyint_col,
count(id) over (partition by year, month order by bool_col, tinyint_col)
from alltypes
where tinyint_col < 2 and month < 3 and (id % 100 < 10)
limit 7) v
---- RESULTS
7
---- TYPES
BIGINT
====
---- QUERY
# Simple ROWS window
select (month % 2), int_col,
sum(int_col) over (partition by (month % 2)
order by int_col rows between unbounded preceding and current row)
from alltypestiny
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,0,0
0,0,0
0,1,1
0,1,2
1,0,0
1,0,0
1,1,1
1,1,2
---- TYPES
INT, INT, BIGINT
====
---- QUERY
# ROWS windows with an end boundary offset, some before or after all results.
select
count(tinyint_col) over (partition by bool_col order by tinyint_col
rows between unbounded preceding and 1 preceding),
sum(tinyint_col) over (partition by bool_col order by tinyint_col
rows between unbounded preceding and 1 following),
sum(tinyint_col) over (partition by bool_col order by tinyint_col
rows between unbounded preceding and 3 preceding),
sum(tinyint_col) over (partition by bool_col order by tinyint_col
rows between unbounded preceding and 20 following),
sum(tinyint_col) over (partition by bool_col order by tinyint_col
rows between unbounded preceding and 20 preceding)
from alltypesagg where id <= 10 and day = 1
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,4,NULL,25,NULL
1,9,NULL,25,NULL
2,16,NULL,25,NULL
3,25,1,25,NULL
4,25,4,25,NULL
0,6,NULL,20,NULL
1,12,NULL,20,NULL
2,20,NULL,20,NULL
3,20,2,20,NULL
4,20,6,20,NULL
4,20,12,20,NULL
---- TYPES
BIGINT, BIGINT, BIGINT, BIGINT, BIGINT
====
---- QUERY
# multiple analytics in the same select block
select tinyint_col, int_col, bigint_col,
count(bigint_col)
over(partition by tinyint_col, bigint_col order by int_col desc
rows between unbounded preceding and 1 following),
max(tinyint_col)
over(partition by bigint_col, tinyint_col order by int_col desc
rows between unbounded preceding and 1 following),
# different window
min(int_col)
over(partition by bigint_col, tinyint_col order by int_col desc
rows between unbounded preceding and 2 following),
# different sort order but same partition
max(int_col)
over(partition by bigint_col, tinyint_col order by int_col asc
rows between unbounded preceding and 2 following),
# different partition
sum(int_col)
over(partition by tinyint_col order by int_col desc
rows between unbounded preceding and 2 following)
from functional.alltypesagg
where id < 20
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,1,10,1,1,1,1,12
1,11,110,1,1,11,11,12
2,2,20,1,2,2,2,14
2,12,120,1,2,12,12,14
3,3,30,1,3,3,3,16
3,13,130,1,3,13,13,16
4,4,40,1,4,4,4,18
4,14,140,1,4,14,14,18
5,5,50,1,5,5,5,20
5,15,150,1,5,15,15,20
6,6,60,1,6,6,6,22
6,16,160,1,6,16,16,22
7,7,70,1,7,7,7,24
7,17,170,1,7,17,17,24
8,8,80,1,8,8,8,26
8,18,180,1,8,18,18,26
9,9,90,1,9,9,9,28
9,19,190,1,9,19,19,28
NULL,10,100,2,NULL,10,10,20
NULL,10,100,2,NULL,10,10,20
NULL,NULL,NULL,0,NULL,NULL,NULL,10
NULL,NULL,NULL,0,NULL,NULL,NULL,20
---- TYPES
TINYINT, INT, BIGINT, BIGINT, TINYINT, INT, INT, BIGINT
====
---- QUERY
# analytics on a grouped aggregation with a final order by
select bool_col,
sum(min(int_col))
over(partition by min(tinyint_col) order by max(int_col)
rows between unbounded preceding and 1 following),
max(sum(bigint_col))
over(partition by min(tinyint_col) order by max(int_col)
rows between unbounded preceding and 1 following),
min(sum(bigint_col))
over(partition by min(tinyint_col) order by sum(int_col)
rows between unbounded preceding and 4 following)
from functional.alltypes
group by 1
order by 1, 2, 3
---- RESULTS
false,1,182500,182500
true,0,146000,146000
---- TYPES
BOOLEAN, BIGINT, BIGINT, BIGINT
====
---- QUERY
# Test for IMPALA-1200
select DISTINCT t1.c1 FROM
(SELECT MAX(smallint_col) OVER (PARTITION BY year ORDER BY id DESC) AS c1
FROM alltypesagg) t1
---- RESULTS
99
---- TYPES
SMALLINT
====
---- QUERY
# Test for IMPALA-1233
with with_1 AS (
SELECT RANK() OVER (ORDER BY t1.id) AS int_col_1
FROM alltypesagg t1)
SELECT COUNT(t1.int_col_1) AS int_col_1
FROM with_1 t1
WHERE t1.int_col_1 IS NULL
---- RESULTS
0
---- TYPES
BIGINT
====
---- QUERY
select tinyint_col, id,
row_number() over (order by tinyint_col, id) total_order,
row_number() over (partition by tinyint_col order by id) partitioned_order,
first_value(id) over (partition by tinyint_col order by id),
first_value(cast(id as string)) over (partition by tinyint_col order by id),
last_value(id) over (partition by tinyint_col order by id),
last_value(cast(id as string)) over (partition by tinyint_col order by id),
last_value(id) over (partition by tinyint_col order by id
rows between unbounded preceding and unbounded following)
from alltypes where id < 25 order by tinyint_col, id
---- RESULTS
0,0,1,1,0,'0',0,'0',20
0,10,2,2,0,'0',10,'10',20
0,20,3,3,0,'0',20,'20',20
1,1,4,1,1,'1',1,'1',21
1,11,5,2,1,'1',11,'11',21
1,21,6,3,1,'1',21,'21',21
2,2,7,1,2,'2',2,'2',22
2,12,8,2,2,'2',12,'12',22
2,22,9,3,2,'2',22,'22',22
3,3,10,1,3,'3',3,'3',23
3,13,11,2,3,'3',13,'13',23
3,23,12,3,3,'3',23,'23',23
4,4,13,1,4,'4',4,'4',24
4,14,14,2,4,'4',14,'14',24
4,24,15,3,4,'4',24,'24',24
5,5,16,1,5,'5',5,'5',15
5,15,17,2,5,'5',15,'15',15
6,6,18,1,6,'6',6,'6',16
6,16,19,2,6,'6',16,'16',16
7,7,20,1,7,'7',7,'7',17
7,17,21,2,7,'7',17,'17',17
8,8,22,1,8,'8',8,'8',18
8,18,23,2,8,'8',18,'18',18
9,9,24,1,9,'9',9,'9',19
9,19,25,2,9,'9',19,'19',19
---- TYPES
TINYINT, INT, BIGINT, BIGINT, INT, STRING, INT, STRING, INT
====
---- QUERY
select bool_col, id,
lag(id, 2, -12345) over (partition by bool_col order by id),
lead(id, 2, -12345) over (partition by bool_col order by id)
from alltypestiny
---- RESULTS: VERIFY_IS_EQUAL_SORTED
false,1,-12345,5
false,3,-12345,7
false,5,1,-12345
false,7,3,-12345
true,0,-12345,4
true,2,-12345,6
true,4,0,-12345
true,6,2,-12345
---- TYPES
BOOLEAN, INT, INT, INT
====
---- QUERY
# Tests rank() and dense_rank()
select year, month, bool_col, tinyint_col,
rank() over (partition by year, month order by bool_col, tinyint_col),
dense_rank() over (partition by year, month order by bool_col, tinyint_col)
from alltypes
where tinyint_col < 2 and month < 3 and (id % 100 < 10)
order by year, month, bool_col, tinyint_col
---- RESULTS
2009,1,false,1,1,1
2009,1,false,1,1,1
2009,1,false,1,1,1
2009,1,false,1,1,1
2009,1,true,0,5,2
2009,1,true,0,5,2
2009,1,true,0,5,2
2009,1,true,0,5,2
2009,2,false,1,1,1
2009,2,false,1,1,1
2009,2,true,0,3,2
2009,2,true,0,3,2
2010,1,false,1,1,1
2010,1,false,1,1,1
2010,1,false,1,1,1
2010,1,true,0,4,2
2010,1,true,0,4,2
2010,1,true,0,4,2
2010,2,false,1,1,1
2010,2,false,1,1,1
2010,2,false,1,1,1
2010,2,true,0,4,2
2010,2,true,0,4,2
2010,2,true,0,4,2
---- TYPES
INT, INT, BOOLEAN, TINYINT, BIGINT, BIGINT
====
---- QUERY
# analytics with inline views
select bool_col, a, b, a+b from
(select
bool_col,
count(int_col) over() a,
sum(int_col + bigint_col) over(partition by bool_col) b
from
(select * from functional.alltypes where id < 20) v1) v2
order by 1, 2, 3
---- RESULTS: VERIFY_IS_EQUAL_SORTED
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
false,20,550,570
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
true,20,440,460
---- TYPES
BOOLEAN, BIGINT, BIGINT, BIGINT
====
---- QUERY
# analytics and unions
select 1 a, min(id) over (partition by int_col)
from functional.alltypes where id < 5
union distinct
select 2, max(id) over (partition by bool_col)
from functional.alltypessmall where id >= 5 and id <= 10
union all
(select 3, sum(bigint_col) over (partition by int_col order by id)
from functional.alltypestiny where bool_col = false)
order by 1, 2 desc nulls first
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,4
1,3
1,2
1,1
1,0
2,10
2,9
3,40
3,30
3,20
3,10
---- TYPES
TINYINT, BIGINT
====
---- QUERY
# analytics in an uncorrelated subquery
select id, int_col, bool_col from functional.alltypestiny t1
where int_col in
(select min(bigint_col) over(partition by bool_col)
from functional.alltypessmall t2 where t2.id < 10)
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,0,true
2,0,true
4,0,true
6,0,true
---- TYPES
INT, INT, BOOLEAN
====
---- QUERY
# test conjunct evaluation in analytic nodes
select * from
(select bool_col, tinyint_col,
sum(tinyint_col) over (partition by bool_col order by tinyint_col
rows between unbounded preceding and 1 following) a
from alltypesagg where id <= 10 and day = 1) v
where
v.tinyint_col * 3 > v.a and
cast(v.a as boolean) = v.bool_col and
v.a is not null
---- RESULTS
true,8,20
---- TYPES
BOOLEAN, TINYINT, BIGINT
====
---- QUERY
# Test ROWS windows with start boundaries
select id,
count(id) over (order by id rows between 3 preceding and 3 preceding) c1,
count(id) over (order by id rows between 3 preceding and 2 preceding) c2,
count(id) over (order by id rows between 3 preceding and 1 preceding) c3,
count(id) over (order by id rows between 3 preceding and current row) c4,
count(id) over (order by id rows between 3 preceding and 1 following) c5,
count(id) over (order by id rows between 3 preceding and 2 following) c6,
count(id) over (order by id rows between 3 preceding and 3 following) c7,
count(id) over (order by id rows between 2 preceding and 3 following) c8,
count(id) over (order by id rows between 1 preceding and 3 following) c9,
count(id) over (order by id rows between current row and 3 following) c10,
count(id) over (order by id rows between 1 following and 3 following) c11,
count(id) over (order by id rows between 2 following and 3 following) c12,
count(id) over (order by id rows between 3 following and 3 following) c13
from alltypes where id < 8
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,0,0,0,1,2,3,4,4,4,4,3,2,1
1,0,0,1,2,3,4,5,5,5,4,3,2,1
2,0,1,2,3,4,5,6,6,5,4,3,2,1
3,1,2,3,4,5,6,7,6,5,4,3,2,1
4,1,2,3,4,5,6,7,6,5,4,3,2,1
5,1,2,3,4,5,6,6,5,4,3,2,1,0
6,1,2,3,4,5,5,5,4,3,2,1,0,0
7,1,2,3,4,4,4,4,3,2,1,0,0,0
---- TYPES
INT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT
====
---- QUERY
# Test sum() and avg() removing values
select id,
sum(int_col) over (order by id rows between 1 preceding and 1 following),
sum(double_col) over (order by id rows between 3 preceding and 2 preceding),
avg(int_col) over (order by id rows between 1 preceding and 1 following),
avg(double_col) over (order by id rows between 3 preceding and 2 preceding),
avg(timestamp_col) over (order by id rows between 2 following and 3 following)
from alltypes where id < 8
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,1,NULL,0.5,NULL,2009-01-01 00:02:30.199999809
1,3,NULL,1,NULL,2009-01-01 00:03:30.449999809
2,6,0,2,0,2009-01-01 00:04:30.349999904
3,9,10.1,3,5.05,2009-01-01 00:05:30.124999999
4,12,30.3,4,15.15,2009-01-01 00:06:30.180000066
5,15,50.49999999999999,5,25.25,2009-01-01 00:07:00.210000038
6,18,70.69999999999999,6,35.34999999999999,NULL
7,13,90.89999999999999,6.5,45.45,NULL
---- DBAPI_RESULTS: VERIFY_IS_EQUAL_SORTED
0,1,NULL,0.5,NULL,2009-01-01 00:02:30.199999
1,3,NULL,1,NULL,2009-01-01 00:03:30.449999
2,6,0,2,0,2009-01-01 00:04:30.349999
3,9,10.1,3,5.05,2009-01-01 00:05:30.124999
4,12,30.3,4,15.15,2009-01-01 00:06:30.180000
5,15,50.49999999999999,5,25.25,2009-01-01 00:07:00.210000
6,18,70.69999999999999,6,35.34999999999999,NULL
7,13,90.89999999999999,6.5,45.45,NULL
---- TYPES
INT, BIGINT, DOUBLE, DOUBLE, DOUBLE, TIMESTAMP
====
---- QUERY
# More testing of start bounds. This exposed a bug in removing
# values from the window after the partition.
select tinyint_col, int_col,
sum(int_col) over (partition by tinyint_col order by int_col nulls last rows between 2 following and 3 following),
sum(int_col) over (partition by tinyint_col order by int_col nulls last rows between 2 following and 10 following),
sum(int_col) over (partition by tinyint_col order by int_col nulls last rows between 6 following and 100 following)
from alltypesagg
where (tinyint_col is NULL or tinyint_col < 3) and id < 60;
---- RESULTS: VERIFY_IS_EQUAL_SORTED
NULL,10,40,280,180
NULL,10,50,260,140
NULL,20,60,240,100
NULL,20,70,210,50
NULL,30,80,180,NULL
NULL,30,90,140,NULL
NULL,40,100,100,NULL
NULL,40,50,50,NULL
NULL,50,NULL,NULL,NULL
NULL,50,NULL,NULL,NULL
NULL,NULL,NULL,NULL,NULL
NULL,NULL,NULL,NULL,NULL
1,1,52,144,NULL
1,11,72,123,NULL
1,21,92,92,NULL
1,31,51,51,NULL
1,41,NULL,NULL,NULL
1,51,NULL,NULL,NULL
2,2,54,148,NULL
2,12,74,126,NULL
2,22,94,94,NULL
2,32,52,52,NULL
2,42,NULL,NULL,NULL
2,52,NULL,NULL,NULL
---- TYPES
TINYINT, INT, BIGINT, BIGINT, BIGINT
====
---- QUERY
# IMPALA-1273: Incorrect results with very large window and small table
select
count(int_col) over (order by bigint_col rows between 95 preceding and 40 following)
from alltypestiny
---- RESULTS
8
8
8
8
8
8
8
8
---- TYPES
BIGINT
====
---- QUERY
select count(distinct t1.c1) from
(select count(id) over (order by id rows between 550 preceding and 550 following) c1
from alltypesagg) t1
---- RESULTS
551
---- TYPES
BIGINT
====
---- QUERY
select count(distinct t1.c1) from
(select count(id) over (order by id rows between 1050 preceding and 2 following) c1
from alltypesagg) t1
---- RESULTS
1051
---- TYPES
BIGINT
====
---- QUERY
select count(distinct t1.c1) from
(select count(id) over (order by id rows between 1050 preceding and 500 preceding) c1
from alltypesagg) t1
---- RESULTS
552
---- TYPES
BIGINT
====
---- QUERY
select count(distinct t1.c1) from
(select count(id) over (order by id rows between 2 following and 1050 following) c1
from alltypesagg) t1
---- RESULTS
1050
---- TYPES
BIGINT
====
---- QUERY
select count(distinct t1.c1) from
(select count(id) over (order by id rows between 500 following and 1050 following) c1
from alltypesagg) t1
---- RESULTS
552
---- TYPES
BIGINT
====
---- QUERY
select (tinyint_col % 4), id,
count(id) over (partition by (tinyint_col % 4) order by id
rows between 3 following and 10 following)
from alltypes where id < 20
order by (tinyint_col % 4), id
---- RESULTS
0,0,3
0,4,2
0,8,1
0,10,0
0,14,0
0,18,0
1,1,3
1,5,2
1,9,1
1,11,0
1,15,0
1,19,0
2,2,1
2,6,0
2,12,0
2,16,0
3,3,1
3,7,0
3,13,0
3,17,0
---- TYPES
TINYINT, INT, BIGINT
====
---- QUERY
# Test IMPALA-1269: ROWS windows with offset to current row might not remove
# correct values after new partitions
select sum(t2.int_col_1) from
(select SUM(t1.int_col) OVER (PARTITION BY t1.smallint_col ORDER BY t1.day, t1.int_col
ROWS 7 PRECEDING) AS int_col_1
FROM alltypesagg t1 ) t2
---- RESULTS
41790600
---- TYPES
BIGINT
====
---- QUERY
# Test FIRST_VALUE with strings and null handling
select cast(floor(id/3) as int), tinyint_col,
first_value(tinyint_col) over (partition by cast(floor(id/3) as int) order by tinyint_col nulls first),
first_value(cast(tinyint_col as string)) over (partition by cast(floor(id/3) as int) order by cast(tinyint_col as string) nulls first),
last_value(tinyint_col) over (partition by cast(floor(id/3) as int) order by tinyint_col nulls first),
last_value(cast(tinyint_col as string)) over (partition by cast(floor(id/3) as int) order by cast(tinyint_col as string) nulls first)
from alltypesagg where id < 50 and (tinyint_col is null or tinyint_col < 4)
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,NULL,NULL,'NULL',NULL,'NULL'
0,NULL,NULL,'NULL',NULL,'NULL'
0,1,NULL,'NULL',1,'1'
0,2,NULL,'NULL',2,'2'
1,3,3,'3',3,'3'
3,NULL,NULL,'NULL',NULL,'NULL'
3,NULL,NULL,'NULL',NULL,'NULL'
3,1,NULL,'NULL',1,'1'
4,2,2,'2',2,'2'
4,3,2,'2',3,'3'
6,NULL,NULL,'NULL',NULL,'NULL'
6,NULL,NULL,'NULL',NULL,'NULL'
7,1,1,'1',1,'1'
7,2,1,'1',2,'2'
7,3,1,'1',3,'3'
10,NULL,NULL,'NULL',NULL,'NULL'
10,NULL,NULL,'NULL',NULL,'NULL'
10,1,NULL,'NULL',1,'1'
10,2,NULL,'NULL',2,'2'
11,3,3,'3',3,'3'
13,NULL,NULL,'NULL',NULL,'NULL'
13,NULL,NULL,'NULL',NULL,'NULL'
13,1,NULL,'NULL',1,'1'
14,2,2,'2',2,'2'
14,3,2,'2',3,'3'
---- TYPES
INT, TINYINT, TINYINT, STRING, TINYINT, STRING
====
---- QUERY
# IMPALA-1302: Incorrect result of FIRST_VALUE query.
select
first_value(id) over (order by id rows between 3 preceding and 3 following),
first_value(id) over (order by id rows between 3 preceding and 2 following),
first_value(id) over (order by id rows between 3 preceding and 1 following),
first_value(id) over (order by id rows between 3 preceding and current row),
first_value(id) over (order by id rows between 3 preceding and 1 preceding),
first_value(id) over (order by id rows between 3 preceding and 2 preceding),
first_value(id) over (order by id rows between 3 preceding and 3 preceding),
first_value(id) over (order by id rows between 2 preceding and 3 following),
first_value(id) over (order by id rows between 1 preceding and 3 following),
first_value(id) over (order by id rows between current row and 3 following),
first_value(id) over (order by id rows between 1 following and 3 following),
first_value(id) over (order by id rows between 2 following and 3 following),
first_value(id) over (order by id rows between 3 following and 3 following),
first_value(id) over (order by id rows between 15 preceding and 15 preceding),
first_value(id) over (order by id rows between 15 preceding and 15 following),
first_value(id) over (order by id rows between 15 preceding and 6 preceding),
first_value(id) over (order by id rows between 6 preceding and 15 following),
first_value(id) over (order by id rows between 15 following and 15 following),
first_value(id) over (order by id rows between unbounded preceding and 8 preceding),
last_value(id) over (order by id rows between 3 preceding and 3 following),
last_value(id) over (order by id rows between 3 preceding and 2 following),
last_value(id) over (order by id rows between 3 preceding and 1 following),
last_value(id) over (order by id rows between 3 preceding and current row),
last_value(id) over (order by id rows between 3 preceding and 1 preceding),
last_value(id) over (order by id rows between 3 preceding and 2 preceding),
last_value(id) over (order by id rows between 3 preceding and 3 preceding),
last_value(id) over (order by id rows between 2 preceding and 3 following),
last_value(id) over (order by id rows between 1 preceding and 3 following),
last_value(id) over (order by id rows between current row and 3 following),
last_value(id) over (order by id rows between 1 following and 3 following),
last_value(id) over (order by id rows between 2 following and 3 following),
last_value(id) over (order by id rows between 3 following and 3 following),
last_value(id) over (order by id rows between 15 preceding and 15 preceding),
last_value(id) over (order by id rows between 15 preceding and 15 following),
last_value(id) over (order by id rows between 15 preceding and 6 preceding),
last_value(id) over (order by id rows between 6 preceding and 15 following),
last_value(id) over (order by id rows between 15 following and 15 following),
last_value(id) over (order by id rows between 8 following and unbounded following)
from alltypes where id < 10 order by id;
---- RESULTS
0,0,0,0,NULL,NULL,NULL,0,0,0,1,2,3,NULL,0,NULL,0,NULL,NULL,3,2,1,0,NULL,NULL,NULL,3,3,3,3,3,3,NULL,9,NULL,9,NULL,9
0,0,0,0,0,NULL,NULL,0,0,1,2,3,4,NULL,0,NULL,0,NULL,NULL,4,3,2,1,0,NULL,NULL,4,4,4,4,4,4,NULL,9,NULL,9,NULL,9
0,0,0,0,0,0,NULL,0,1,2,3,4,5,NULL,0,NULL,0,NULL,NULL,5,4,3,2,1,0,NULL,5,5,5,5,5,5,NULL,9,NULL,9,NULL,NULL
0,0,0,0,0,0,0,1,2,3,4,5,6,NULL,0,NULL,0,NULL,NULL,6,5,4,3,2,1,0,6,6,6,6,6,6,NULL,9,NULL,9,NULL,NULL
1,1,1,1,1,1,1,2,3,4,5,6,7,NULL,0,NULL,0,NULL,NULL,7,6,5,4,3,2,1,7,7,7,7,7,7,NULL,9,NULL,9,NULL,NULL
2,2,2,2,2,2,2,3,4,5,6,7,8,NULL,0,NULL,0,NULL,NULL,8,7,6,5,4,3,2,8,8,8,8,8,8,NULL,9,NULL,9,NULL,NULL
3,3,3,3,3,3,3,4,5,6,7,8,9,NULL,0,0,0,NULL,NULL,9,8,7,6,5,4,3,9,9,9,9,9,9,NULL,9,0,9,NULL,NULL
4,4,4,4,4,4,4,5,6,7,8,9,NULL,NULL,0,0,1,NULL,NULL,9,9,8,7,6,5,4,9,9,9,9,9,NULL,NULL,9,1,9,NULL,NULL
5,5,5,5,5,5,5,6,7,8,9,NULL,NULL,NULL,0,0,2,NULL,0,9,9,9,8,7,6,5,9,9,9,9,NULL,NULL,NULL,9,2,9,NULL,NULL
6,6,6,6,6,6,6,7,8,9,NULL,NULL,NULL,NULL,0,0,3,NULL,0,9,9,9,9,8,7,6,9,9,9,NULL,NULL,NULL,NULL,9,3,9,NULL,NULL
---- TYPES
INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT, INT
====
---- QUERY
# Test RANGE with UNBOUNDED FOLLOWING
select id,
sum(id) over (order by id range between unbounded preceding and unbounded following)
from alltypesagg where id <= 30 order by id;
---- RESULTS
0,525
0,525
1,525
2,525
3,525
4,525
5,525
6,525
7,525
8,525
9,525
10,525
10,525
11,525
12,525
13,525
14,525
15,525
16,525
17,525
18,525
19,525
20,525
20,525
21,525
22,525
23,525
24,525
25,525
26,525
27,525
28,525
29,525
30,525
30,525
---- TYPES
INT, BIGINT
====
---- QUERY
# IMPALA-1293: DCHECK failure with window ROWS BETWEEN UNBOUNDED PRECEDING
# AND <large> PRECEDING
select count(*), count(t1.c1) from
(select SUM(tinyint_col) OVER (PARTITION BY bigint_col ORDER BY tinyint_col ASC,
id ASC, timestamp_col ASC, day ASC
ROWS BETWEEN UNBOUNDED PRECEDING AND 11 PRECEDING) c1
FROM alltypesagg) t1
---- RESULTS
11000,0
---- TYPES
BIGINT, BIGINT
====
---- QUERY
# Test RANGE with UNBOUNDED FOLLOWING on many partitions
select count(distinct t1.c1) from
(select sum(id) over (partition by day order by tinyint_col
range between unbounded preceding and unbounded following) c1
from alltypesagg) t1;
---- RESULTS
11
---- TYPES
BIGINT
====
---- QUERY
# IMPALA-1296: Fix DCHECK when unnecessary buffered tuple (for sum() analytic) not null
select
sum(month) over (),
dense_rank() over (order by date_string_col)
from alltypes where id < 5
---- RESULTS
5,1
5,1
5,1
5,1
5,1
---- TYPES
BIGINT, BIGINT
====
---- QUERY
# IMPALA-1292: Incorrect result in analytic SUM when ORDER BY column is null
select tinyint_col, id,
SUM(id) OVER (ORDER BY tinyint_col ASC, id ASC)
FROM alltypesagg
where (tinyint_col is NULL or tinyint_col < 2) and id < 100 order by 1, 2
---- RESULTS
1,1,1
1,11,12
1,21,33
1,31,64
1,41,105
1,51,156
1,61,217
1,71,288
1,81,369
1,91,460
NULL,0,460
NULL,0,460
NULL,10,480
NULL,10,480
NULL,20,520
NULL,20,520
NULL,30,580
NULL,30,580
NULL,40,660
NULL,40,660
NULL,50,760
NULL,50,760
NULL,60,880
NULL,60,880
NULL,70,1020
NULL,70,1020
NULL,80,1180
NULL,80,1180
NULL,90,1360
NULL,90,1360
---- TYPES
TINYINT, INT, BIGINT
====
---- QUERY
# IMPALA-1280: Crash running analytic with LEFT SEMI JOIN
select sum(t1.int_col) over (partition by t1.id order by t1.int_col, t1.month)
from alltypestiny t1
where exists (select tt1.month from alltypes tt1 where t1.int_col = tt1.smallint_col)
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0
0
0
1
0
1
1
1
---- TYPES
BIGINT
====
---- QUERY
# Regression test for IMPALA-1223. Query with analytic query with right join.
# Need proper handling of NULLs.
SELECT SUM(t1.int_col) OVER () AS int_col_1 FROM alltypestiny t1
RIGHT JOIN alltypestiny t2 ON t2.tinyint_col = t1.bigint_col AND t2.id = t1.month;
---- RESULTS
0
0
0
0
0
0
0
0
---- TYPES
BIGINT
====
---- QUERY
## IMPALA-1312: Fix DCHECK failure in BufferedTupleStream
## Disabled due to slow execution without codegen, breaks exhaustive runs (IMPALA-1414)
#SELECT
# SUM(t1.int_col * t1.id) OVER (
# ORDER BY t1.int_col * t1.id ASC, t3.year + t3.tinyint_col ASC ROWS
# BETWEEN 66 PRECEDING AND 21 FOLLOWING) AS int_col_1
#FROM alltypes t1
#INNER JOIN alltypes t2 ON t2.month = t1.smallint_col
#INNER JOIN alltypes t3 ON t3.bigint_col = t2.bigint_col
#WHERE
#t2.id <= t3.smallint_col + t2.tinyint_col
#order by 1
#limit 5
#---- TYPES
#BIGINT
#---- RESULTS
#22
#23
#24
#25
#26
#====
#----QUERY
# Testing crashes in buffered tuple stream related to IMPALA-1306.
SELECT AVG(t1.int_col) OVER () AS double_col_1
FROM alltypestiny t1
WHERE EXISTS
(SELECT t1.month AS int_col_1
FROM alltypestiny t1)
---- RESULTS
0.5
0.5
0.5
0.5
0.5
0.5
0.5
0.5
---- TYPES
DOUBLE
====
---- QUERY
# IMPALA-1808: handle partition/order by exprs with NaN
select id / int_col,
count(*) over (partition by id / int_col order by id),
count(*) over (order by id / int_col),
count(*) over (order by id / int_col rows between 100 preceding and 90 preceding),
count(*) over (order by id / int_col rows between 2 preceding and 1 preceding),
count(*) over (order by id / int_col rows between 1 preceding and 1 following),
count(*) over (order by id / int_col rows between 1 following and 2 following),
count(*) over (order by id / int_col rows between 90 following and 100 following)
from alltypestiny
---- RESULTS
NaN,1,1,0,0,2,2,0
1,1,2,0,1,3,2,0
3,1,3,0,2,3,2,0
5,1,4,0,2,3,2,0
7,1,5,0,2,3,2,0
Infinity,1,8,0,2,3,2,0
Infinity,2,8,0,2,3,1,0
Infinity,3,8,0,2,2,0,0
---- TYPES
DOUBLE, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT
====
----QUERY
# IMPALA-1483: Check that TupleIsNullPredicates referring to the logical
# analytic output are substituted with TupleIsNullPredicates referring to
# the physical output.
select t2.a, t2.id, t2.int_col
from functional.alltypes t1
left outer join
(select sum(int_col) over(order by id) a, id, int_col is null as int_col
from functional.alltypes
where year = 2009 and month between 1 and 2) t2
on (t1.id = t2.id)
where year = 2009 and month = 1
order by t2.id limit 10
---- RESULTS
0,0,false
1,1,false
3,2,false
6,3,false
10,4,false
15,5,false
21,6,false
28,7,false
36,8,false
45,9,false
---- TYPES
BIGINT, INT, BOOLEAN
====
---- QUERY
# IMPALA-1542: Analytic fn containing arithmetic expr on NULL crashed
select lag(coalesce(505, 1 + NULL), 1) over (order by int_col desc)
from functional.alltypestiny
---- RESULTS
NULL
505
505
505
505
505
505
505
---- TYPES
SMALLINT
====
---- QUERY
# IMPALA-1562: AnalyticEvalNode not properly handling nullable tuple
select max(t3.c1) from
(select min(t2.string_col) over () c1
from alltypesagg t1
left join alltypes t2 on t2.string_col = t1.string_col ) t3
---- RESULTS
'0'
---- TYPES
STRING
====
---- QUERY
# IMPALA-1559: FIRST_VALUE rewrite function intermediate type not matching slot type
select
first_value(-32.9) over (order by id rows between 92 preceding and unbounded following),
first_value(1.1) over (order by id rows between 92 preceding and 1 preceding)
from alltypestiny
---- RESULTS: VERIFY_IS_EQUAL_SORTED
-32.9,NULL
-32.9,1.1
-32.9,1.1
-32.9,1.1
-32.9,1.1
-32.9,1.1
-32.9,1.1
-32.9,1.1
---- TYPES
DECIMAL, DECIMAL
====
---- QUERY
# IMPALA-1888: FIRST_VALUE produces incorrect results when the window
# completely precedes all rows in a partition.
select id, date_string_col,
first_value(id) over (partition by date_string_col order by id
rows between 10 preceding and 3 preceding)
from alltypes where id < 15 order by id
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,'01/01/09',NULL
1,'01/01/09',NULL
2,'01/01/09',NULL
3,'01/01/09',0
4,'01/01/09',0
5,'01/01/09',0
6,'01/01/09',0
7,'01/01/09',0
8,'01/01/09',0
9,'01/01/09',0
10,'01/02/09',NULL
11,'01/02/09',NULL
12,'01/02/09',NULL
13,'01/02/09',10
14,'01/02/09',10
---- TYPES
INT, STRING, INT
====
---- QUERY
# IMPALA-1519: Check that the first analytic sort of a select block
# materializes TupleIsNullPredicates to be substituted in ancestor nodes.
select
sum(t1.id) over (partition by t1.bool_col),
count(1) over (order by t1.int_col),
avg(g) over (order by f),
t2.a,
t2.d
from functional.alltypestiny t1
left outer join
(select
id as a,
coalesce(id, 10) as b,
int_col as c,
coalesce(int_col, 20) as d,
bigint_col e,
coalesce(bigint_col, 30) as f,
coalesce(id + bigint_col, 40) as g
from functional.alltypestiny) t2
on (t1.id = t2.a + 100)
---- RESULTS: VERIFY_IS_EQUAL_SORTED
12,4,NULL,NULL,NULL
12,4,NULL,NULL,NULL
12,4,NULL,NULL,NULL
12,4,NULL,NULL,NULL
16,8,NULL,NULL,NULL
16,8,NULL,NULL,NULL
16,8,NULL,NULL,NULL
16,8,NULL,NULL,NULL
---- TYPES
BIGINT, BIGINT, DOUBLE, INT, INT
====
---- QUERY
# IMPALA-1519: Check that the first analytic sort of a select block
# materializes TupleIsNullPredicates to be substituted in ancestor nodes.
# Same test as the one above, but with an aggregation on top.
select avg(af1), sum(af3), count(a)
from
(select
sum(t1.id) over (partition by t1.bool_col) af1,
count(1) over (order by t1.int_col) af2,
avg(g) over (order by f) af3,
t2.a,
t2.d
from functional.alltypestiny t1
left outer join
(select
id as a,
coalesce(id, 10) as b,
int_col as c,
coalesce(int_col, 20) as d,
bigint_col e,
coalesce(bigint_col, 30) as f,
coalesce(id + bigint_col, 40) as g
from functional.alltypestiny) t2
on (t1.id = t2.a + 100)) t3
group by d
---- RESULTS: VERIFY_IS_EQUAL_SORTED
14,NULL,0
---- TYPES
DOUBLE, DOUBLE, BIGINT
====
---- QUERY
# IMPALA-1519: Check that expr wrapping with a TupleIsNullPredicate
# is performed correctly with analytics and multiple nesting levels.
# The test covers these cases:
# - analytic output that needs to be wrapped
# - analytic input that needs to be wrapped
# - both cases above with and without order by in the over() clause
# - exprs that need to be wrapped multiple times due to multiple outer joins
select * from
(select
a.id,
sum(x) over (partition by a.id) as x,
ifnull(y, 10) as y,
ifnull(z, "b") as z
from functional.alltypestiny a
left outer join
(select id,
ifnull(int_col, 1) x,
count(bigint_col) over(partition by id) y,
ifnull(string_col, "a") z
from functional.alltypestiny b) v1
on (a.id = v1.id)) v2
full outer join
(select
c.id,
sum(x) over (order by c.id) as x,
ifnull(y, 10) as y,
ifnull(z, "b") as z
from functional.alltypestiny c
left outer join
(select id,
ifnull(int_col, 1) x,
count(bigint_col) over(order by id) y,
ifnull(string_col, "a") z
from functional.alltypestiny d) v3
on (c.id = v3.id)) v4
on (v2.id = v4.id)
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,0,1,'0',0,0,1,'0'
1,1,1,'1',1,1,2,'1'
2,0,1,'0',2,1,3,'0'
3,1,1,'1',3,2,4,'1'
4,0,1,'0',4,2,5,'0'
5,1,1,'1',5,3,6,'1'
6,0,1,'0',6,3,7,'0'
7,1,1,'1',7,4,8,'1'
---- TYPES
INT, BIGINT, BIGINT, STRING, INT, BIGINT, BIGINT, STRING
====
---- QUERY
# IMPALA-1519: Check that expr wrapping with a TupleIsNullPredicate
# is performed correctly with analytics and multiple nesting levels.
# This is a tricky case from IMPALA-1518 which has a non-materialized
# analytic in a WITH-clause.
with w as
(select coalesce(-668, -335, count(smallint_col)) as int_col,
lead('670', 87) over
(order by coalesce(-668, -335, count(smallint_col)) desc) as char_col
from functional.alltypes)
select
lead(-410, 11) over
(order by (t2.int_col) + (t2.int_col) desc,
greatest(coalesce(t2.float_col, 16.51),
coalesce(t2.int_col, 997)) desc) as int_col,
(coalesce(177, -593, 723)) + (lag(227, 43) over
(order by (t2.int_col) + (t2.int_col) desc,
greatest(coalesce(t2.float_col, 16.51),
coalesce(t2.int_col, 997)) desc)) as int_col_2,
(t2.int_col) + (t2.int_col) as int_col_3,
greatest(coalesce(t2.float_col, 16.51), coalesce(t2.int_col, 997)) as float_col,
coalesce(t1.int_col, t1.int_col, t2.int_col) as int_col_4
from w t1
right outer join
(select (t1.month) + (t1.year) as int_col,
least(coalesce(t1.float_col, 683.98), -213.01) as float_col
from functional.alltypestiny t1
where (t1.int_col) not in (t1.smallint_col, t1.smallint_col)) t2
on (t2.int_col = t1.int_col and t2.int_col = t1.int_col and t2.int_col = t1.int_col)
---- RESULTS
---- TYPES
SMALLINT, INT, BIGINT, DOUBLE, BIGINT
====
---- QUERY
# IMPALA-1946: Check that On-clause predicates of an outer join assigned in a scan
# are not wrapped in TupleIsNullPredicates.
select /* +straight_join */ a.id, b.id
from functional.alltypestiny a
left outer join
(select t1.id, ifnull(t1.int_col, 10) as int_col
from functional.alltypestiny t1
inner join functional.alltypestiny t2
on (t1.id = t2.id)) b
on (a.id = b.id and b.int_col < 10)
---- RESULTS
0,0
1,1
2,2
3,3
4,4
5,5
6,6
7,7
---- TYPES
INT, INT
====
---- QUERY
# Test percent_rank() with multiple partition exprs and multiple order by exprs and
# compare with the rewrite target query
select t.year, t.month, t.id, t.pr, (t.r-1)/(t.c-1)
from (select year, month, id, rank() over (partition by year, month order by id) as r,
count() over(partition by year, month) as c,
percent_rank() over(partition by year, month order by id) as pr
from functional.alltypes where tinyint_col < 2 and month < 3 and (id % 100 < 10)) as t
order by t.year, t.month, t.id;
---- RESULTS
2009,1,0,0,0
2009,1,1,0.1428571428571428,0.1428571428571428
2009,1,100,0.2857142857142857,0.2857142857142857
2009,1,101,0.4285714285714285,0.4285714285714285
2009,1,200,0.5714285714285714,0.5714285714285714
2009,1,201,0.7142857142857143,0.7142857142857143
2009,1,300,0.8571428571428571,0.8571428571428571
2009,1,301,1,1
2009,2,400,0,0
2009,2,401,0.3333333333333333,0.3333333333333333
2009,2,500,0.6666666666666666,0.6666666666666666
2009,2,501,1,1
2010,1,3700,0,0
2010,1,3701,0.2,0.2
2010,1,3800,0.4,0.4
2010,1,3801,0.6,0.6
2010,1,3900,0.8,0.8
2010,1,3901,1,1
2010,2,4000,0,0
2010,2,4001,0.2,0.2
2010,2,4100,0.4,0.4
2010,2,4101,0.6,0.6
2010,2,4200,0.8,0.8
2010,2,4201,1,1
---- TYPES
INT, INT, INT, DOUBLE, DOUBLE
====
---- QUERY
# Test cume_dist() with multiple partition exprs and multiple order by exprs
# and compare with the rewrite query
select t.year, t.month, t.id, t.cd, ((t.c-t.r)+1)/t.c
from (select year, month, id,
rank() over (partition by year, month order by id desc) as r,
count() over(partition by year, month) as c,
cume_dist() over(partition by year, month order by id) as cd
from functional.alltypes where tinyint_col < 2 and month < 3 and (id % 100 < 10)) as t
order by t.year, t.month, t.id;
---- RESULTS
2009,1,0,0.125,0.125
2009,1,1,0.25,0.25
2009,1,100,0.375,0.375
2009,1,101,0.5,0.5
2009,1,200,0.625,0.625
2009,1,201,0.75,0.75
2009,1,300,0.875,0.875
2009,1,301,1,1
2009,2,400,0.25,0.25
2009,2,401,0.5,0.5
2009,2,500,0.75,0.75
2009,2,501,1,1
2010,1,3700,0.1666666666666667,0.1666666666666667
2010,1,3701,0.3333333333333333,0.3333333333333333
2010,1,3800,0.5,0.5
2010,1,3801,0.6666666666666666,0.6666666666666666
2010,1,3900,0.8333333333333334,0.8333333333333334
2010,1,3901,1,1
2010,2,4000,0.1666666666666667,0.1666666666666667
2010,2,4001,0.3333333333333333,0.3333333333333333
2010,2,4100,0.5,0.5
2010,2,4101,0.6666666666666666,0.6666666666666666
2010,2,4200,0.8333333333333334,0.8333333333333334
2010,2,4201,1,1
---- TYPES
INT, INT, INT, DOUBLE, DOUBLE
====
---- QUERY
# Test ntile() with multiple partition exprs and multiple order by exprs and compare
# with the rewrite query
select t.year, t.month, t.id, t.nt, floor((if(t.c>3, 3, t.c)*(t.rownum-1))/t.c) + 1
from (select year, month, id,
row_number() over (partition by year, month order by id) as rownum,
count() over(partition by year, month) as c,
ntile(3) over(partition by year, month order by id) as nt
from functional.alltypes where tinyint_col < 2 and month < 3 and (id % 100 < 10)) as t
order by t.year, t.month, t.id;
---- RESULTS
2009,1,0,1,1
2009,1,1,1,1
2009,1,100,1,1
2009,1,101,2,2
2009,1,200,2,2
2009,1,201,2,2
2009,1,300,3,3
2009,1,301,3,3
2009,2,400,1,1
2009,2,401,1,1
2009,2,500,2,2
2009,2,501,3,3
2010,1,3700,1,1
2010,1,3701,1,1
2010,1,3800,2,2
2010,1,3801,2,2
2010,1,3900,3,3
2010,1,3901,3,3
2010,2,4000,1,1
2010,2,4001,1,1
2010,2,4100,2,2
2010,2,4101,2,2
2010,2,4200,3,3
2010,2,4201,3,3
---- TYPES
INT, INT, INT, BIGINT, DOUBLE
====
---- QUERY
# Test for ntile when the argument is larger than number of rows
select year, month, id, ntile(1000) over(order by year, month, id) from alltypes where
tinyint_col < 2 and month < 3 and (id % 100 < 10) order by year, month, id
---- RESULTS
2009,1,0,1
2009,1,1,2
2009,1,100,3
2009,1,101,4
2009,1,200,5
2009,1,201,6
2009,1,300,7
2009,1,301,8
2009,2,400,9
2009,2,401,10
2009,2,500,11
2009,2,501,12
2010,1,3700,13
2010,1,3701,14
2010,1,3800,15
2010,1,3801,16
2010,1,3900,17
2010,1,3901,18
2010,2,4000,19
2010,2,4001,20
2010,2,4100,21
2010,2,4101,22
2010,2,4200,23
2010,2,4201,24
---- TYPES
INT, INT, INT, BIGINT
====
---- QUERY
# percent_rank() and cume_dist() with inline views
select bool_col, a, b, a + b from
(select
bool_col,
percent_rank() over(order by id) a,
cume_dist() over(partition by bool_col order by id) b
from
(select * from functional.alltypes where id < 20) v1) v2
order by 1, 2, 3
---- RESULTS: VERIFY_IS_EQUAL_SORTED
false,0.05263157894736842,0.1,0.1526315789473684
false,0.1578947368421053,0.2,0.3578947368421053
false,0.2631578947368421,0.3,0.5631578947368421
false,0.3684210526315789,0.4,0.7684210526315789
false,0.4736842105263158,0.5,0.9736842105263157
false,0.5789473684210527,0.6,1.178947368421053
false,0.6842105263157895,0.7,1.38421052631579
false,0.7894736842105263,0.8,1.589473684210526
false,0.8947368421052632,0.9,1.794736842105263
false,1,1,2
true,0,0.1,0.1
true,0.1052631578947368,0.2,0.3052631578947368
true,0.2105263157894737,0.3,0.5105263157894737
true,0.3157894736842105,0.4,0.7157894736842105
true,0.4210526315789473,0.5,0.9210526315789473
true,0.5263157894736842,0.6,1.126315789473684
true,0.631578947368421,0.7,1.331578947368421
true,0.7368421052631579,0.8,1.536842105263158
true,0.8421052631578947,0.9,1.742105263157895
true,0.9473684210526315,1,1.947368421052631
---- TYPES
BOOLEAN, DOUBLE, DOUBLE, DOUBLE
====
---- QUERY
select 1 a, cume_dist() over (order by int_col) as cd from alltypes where id < 5
union
distinct (select 2, percent_rank() over (partition by bool_col order by int_col)
from alltypesagg where id >= 5 and id <= 10) order by a, cd;
---- RESULTS
1,0.2
1,0.4
1,0.6
1,0.8
1,1
2,0
2,0.333333333333333
2,0.5
2,0.666666666666666
2,1
---- TYPES
TINYINT, DOUBLE
====
---- QUERY
# Test analytic function rewrite when the same expr appears twice
select id, percent_rank() over(order by id) from functional.alltypestiny
order by percent_rank() over (order by id)
---- RESULTS
0,0
1,0.1428571428571428
2,0.2857142857142857
3,0.4285714285714285
4,0.5714285714285714
5,0.7142857142857143
6,0.8571428571428571
7,1
---- TYPES
INT, DOUBLE
====
---- QUERY
# Tests IMPALA-2158. Check that analytic functions with a limit succeed.
select id, rank() over(order by id) from functional.alltypesagg limit 10
---- RESULTS
0,1
0,1
1,3
2,4
3,5
4,6
5,7
6,8
7,9
8,10
---- TYPES
INT, BIGINT
====
---- QUERY
# Regression test for IMPALA-2829: MemPool mem transfer to the output rowbatch
# may result in a crash when the mem transfer occurs right before eos. This
# repro was crafted to hit the crash by playing with the window and row sizes
# so that memory allocated within the analytic node is just large enough (>8mb)
# to be transfered to the output row batch immediately before eos.
select max(t3.c1), max(t3.c2)
from (
select
avg( t1.timestamp_col )
over (order by t1.id, t2.id rows between 5000 following and 50000 following) c1,
avg( t2.timestamp_col )
over (order by t1.id, t2.id rows between 5000 following and 50000 following) c2
from alltypesagg t1 join alltypesagg t2 where t1.int_col = t2.int_col
) t3
---- RESULTS
2010-01-10 18:02:05.234931468,2010-01-10 18:02:05.215156078
---- DBAPI_RESULTS
2010-01-10 18:02:05.234931,2010-01-10 18:02:05.215156
---- TYPES
TIMESTAMP, TIMESTAMP
====
---- QUERY
# IMPALA-2457 - PERCENT_RANK() returns NaN for row group with 1 row.
select id, rank() over (partition by id order by tinyint_col),
count() over (partition by id), percent_rank() over (partition by id order by tinyint_col)
from functional.alltypestiny order by id
---- RESULTS
0,1,1,0
1,1,1,0
2,1,1,0
3,1,1,0
4,1,1,0
5,1,1,0
6,1,1,0
7,1,1,0
---- TYPES
int, bigint, bigint, double
====
---- QUERY
# Check that the a large analytic query can succeed with the minimum buffers (3 buffers
# for sort, 2 buffer for analytic, plus 32KB extra for the I/O buffers required to scan
# the < 16KB functional_parquet.alltypes files).
SET default_spillable_buffer_size=8m;
SET buffer_pool_limit=47m;
SELECT lag(-180, 13) over (ORDER BY t1.int_col ASC, t2.int_col ASC) AS int_col
FROM functional_parquet.alltypes t1 CROSS JOIN functional_parquet.alltypes t2 LIMIT 10;
---- TYPES
SMALLINT
---- RESULTS
NULL
NULL
NULL
NULL
NULL
NULL
NULL
NULL
NULL
NULL
====
---- QUERY
# IMPALA-2906: Tests that an analytic sort does not attempt to materialize
# a constant expr wrapped in a TupleIsNullPredicate that is not executable
# at the analytic sort. Here 'y' is wrapped in a TupleIsNullPredicate, and
# the analytic sort of the top-level block should not attempt to materialize
# that wrapped constant expr.
SELECT
LEAD(996, 13) OVER (ORDER BY t1.int_col)
FROM functional.alltypestiny t1
LEFT JOIN (
SELECT
v1.x,
(-560) IN (694, 44) AS y
FROM (
SELECT
LAG(bigint_col, 34) OVER (ORDER BY t2.int_col) AS x
FROM functional.alltypestiny t2
) v1
) v2
ON v2.x = t1.bigint_col AND v2.y = t1.bigint_col
GROUP BY t1.int_col
---- TYPES
SMALLINT
---- RESULTS
NULL
NULL
====
---- QUERY
# Start IGNORE NULLS
# Test all possible combinations of bound types for last_value that don't get rewritten.
select id, tinyint_col,
last_value(tinyint_col ignore nulls) over
(order by id rows between unbounded preceding and 1 preceding),
last_value(tinyint_col ignore nulls) over
(order by id rows between unbounded preceding and current row),
last_value(tinyint_col ignore nulls) over
(order by id rows between unbounded preceding and 1 following),
last_value(tinyint_col ignore nulls) over
(order by id rows between unbounded preceding and unbounded following),
last_value(tinyint_col ignore nulls) over
(order by id rows between 2 preceding and 1 preceding),
last_value(tinyint_col ignore nulls) over
(order by id rows between 2 preceding and current row),
last_value(tinyint_col ignore nulls) over
(order by id rows between 1 preceding and 2 following),
last_value(tinyint_col ignore nulls) over
(order by id rows between current row and current row),
last_value(cast(tinyint_col as string) ignore nulls) over
(order by id rows between current row and 1 following),
last_value(cast(tinyint_col as string) ignore nulls) over
(order by id rows between 1 following and 2 following),
last_value(tinyint_col ignore nulls) over
(order by id range between unbounded preceding and current row),
last_value(tinyint_col ignore nulls) over
(order by id range between unbounded preceding and unbounded following)
from functional.alltypesagg where id < 21
---- TYPES
INT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, STRING, STRING, TINYINT, TINYINT
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,NULL,NULL,NULL,NULL,9,NULL,NULL,1,NULL,'NULL','1',NULL,9
0,NULL,NULL,NULL,1,9,NULL,NULL,2,NULL,'1','2',NULL,9
1,1,NULL,1,2,9,NULL,1,3,1,'2','3',1,9
2,2,1,2,3,9,1,2,4,2,'3','4',2,9
3,3,2,3,4,9,2,3,5,3,'4','5',3,9
4,4,3,4,5,9,3,4,6,4,'5','6',4,9
5,5,4,5,6,9,4,5,7,5,'6','7',5,9
6,6,5,6,7,9,5,6,8,6,'7','8',6,9
7,7,6,7,8,9,6,7,9,7,'8','9',7,9
8,8,7,8,9,9,7,8,9,8,'9','9',8,9
9,9,8,9,9,9,8,9,9,9,'9','NULL',9,9
10,NULL,9,9,9,9,9,9,1,NULL,'NULL','1',9,9
10,NULL,9,9,1,9,9,9,2,NULL,'1','2',9,9
11,1,9,1,2,9,NULL,1,3,1,'2','3',1,9
12,2,1,2,3,9,1,2,4,2,'3','4',2,9
13,3,2,3,4,9,2,3,5,3,'4','5',3,9
14,4,3,4,5,9,3,4,6,4,'5','6',4,9
15,5,4,5,6,9,4,5,7,5,'6','7',5,9
16,6,5,6,7,9,5,6,8,6,'7','8',6,9
17,7,6,7,8,9,6,7,9,7,'8','9',7,9
18,8,7,8,9,9,7,8,9,8,'9','9',8,9
19,9,8,9,9,9,8,9,9,9,'9','NULL',9,9
20,NULL,9,9,9,9,9,9,9,NULL,'NULL','NULL',9,9
20,NULL,9,9,9,9,9,9,NULL,NULL,'NULL','NULL',9,9
====
---- QUERY
# Test the remaining combinations of bound types. These are handled separately from above
# because they are rewritten as first_value() functions, so their results can be returned
# in a different order, since there isn't a total ordering on id.
select id, tinyint_col,
last_value(tinyint_col ignore nulls) over
(order by id rows between 1 preceding and unbounded following),
last_value(tinyint_col ignore nulls) over
(order by id rows between current row and unbounded following),
last_value(tinyint_col ignore nulls) over
(order by id rows between 2 following and unbounded following),
last_value(tinyint_col ignore nulls) over
(order by id range between current row and unbounded following)
from functional.alltypesagg where id < 21 order by id
---- TYPES
INT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,NULL,9,9,9,9
0,NULL,9,9,9,9
1,1,9,9,9,9
2,2,9,9,9,9
3,3,9,9,9,9
4,4,9,9,9,9
5,5,9,9,9,9
6,6,9,9,9,9
7,7,9,9,9,9
8,8,9,9,9,9
9,9,9,9,9,9
10,NULL,9,9,9,9
10,NULL,9,9,9,9
11,1,9,9,9,9
12,2,9,9,9,9
13,3,9,9,9,9
14,4,9,9,9,9
15,5,9,9,9,9
16,6,9,9,9,9
17,7,9,9,9,9
18,8,9,9,NULL,9
19,9,9,9,NULL,9
20,NULL,9,NULL,NULL,NULL
20,NULL,NULL,NULL,NULL,NULL
====
---- QUERY
# Test all combinations of bound types for first_value that don't get rewritten.
select id, tinyint_col,
first_value(tinyint_col ignore nulls) over
(order by id rows between unbounded preceding and 1 preceding),
first_value(tinyint_col ignore nulls) over
(order by id rows between unbounded preceding and current row),
first_value(cast(tinyint_col as string) ignore nulls) over
(order by id rows between unbounded preceding and 1 following),
first_value(cast(tinyint_col as string) ignore nulls) over
(order by id rows between unbounded preceding and unbounded following),
first_value(tinyint_col ignore nulls) over
(order by id range between unbounded preceding and current row),
first_value(tinyint_col ignore nulls) over
(order by id range between unbounded preceding and unbounded following)
from functional.alltypesagg where id < 21
---- TYPES
INT, TINYINT, TINYINT, TINYINT, STRING, STRING, TINYINT, TINYINT
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,NULL,NULL,NULL,'NULL','1',NULL,1
0,NULL,NULL,NULL,'1','1',NULL,1
1,1,NULL,1,'1','1',1,1
2,2,1,1,'1','1',1,1
3,3,1,1,'1','1',1,1
4,4,1,1,'1','1',1,1
5,5,1,1,'1','1',1,1
6,6,1,1,'1','1',1,1
7,7,1,1,'1','1',1,1
8,8,1,1,'1','1',1,1
9,9,1,1,'1','1',1,1
10,NULL,1,1,'1','1',1,1
10,NULL,1,1,'1','1',1,1
11,1,1,1,'1','1',1,1
12,2,1,1,'1','1',1,1
13,3,1,1,'1','1',1,1
14,4,1,1,'1','1',1,1
15,5,1,1,'1','1',1,1
16,6,1,1,'1','1',1,1
17,7,1,1,'1','1',1,1
18,8,1,1,'1','1',1,1
19,9,1,1,'1','1',1,1
20,NULL,1,1,'1','1',1,1
20,NULL,1,1,'1','1',1,1
====
---- QUERY
# Test combinations of bound types for first_value that get rewritten as last_value.
select id, tinyint_col,
first_value(tinyint_col ignore nulls) over
(order by id rows between 2 preceding and 1 preceding),
first_value(tinyint_col ignore nulls) over
(order by id rows between 1 preceding and current row),
first_value(tinyint_col ignore nulls) over
(order by id rows between 1 preceding and 1 following),
first_value(tinyint_col ignore nulls) over
(order by id rows between 1 preceding and unbounded following),
first_value(tinyint_col ignore nulls) over
(order by id rows between current row and current row),
first_value(tinyint_col ignore nulls) over
(order by id rows between current row and 1 following),
first_value(tinyint_col ignore nulls) over
(order by id rows between current row and unbounded following),
first_value(tinyint_col ignore nulls) over
(order by id rows between 1 following and 2 following),
first_value(tinyint_col ignore nulls) over
(order by id rows between 2 following and unbounded following),
first_value(tinyint_col ignore nulls) over
(order by id range between current row and unbounded following)
from functional.alltypesagg where id < 21
---- TYPES
INT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,NULL,NULL,NULL,NULL,1,NULL,NULL,1,1,1,1
0,NULL,NULL,NULL,1,1,NULL,1,1,1,2,1
1,1,NULL,1,1,1,1,1,1,2,3,1
2,2,1,1,1,1,2,2,2,3,4,2
3,3,1,2,2,2,3,3,3,4,5,3
4,4,2,3,3,3,4,4,4,5,6,4
5,5,3,4,4,4,5,5,5,6,7,5
6,6,4,5,5,5,6,6,6,7,8,6
7,7,5,6,6,6,7,7,7,8,9,7
8,8,6,7,7,7,8,8,8,9,1,8
9,9,7,8,8,8,9,9,9,NULL,1,9
10,NULL,8,9,9,9,NULL,NULL,1,1,1,1
10,NULL,9,NULL,1,1,NULL,1,1,1,2,1
11,1,NULL,1,1,1,1,1,1,2,3,1
12,2,1,1,1,1,2,2,2,3,4,2
13,3,1,2,2,2,3,3,3,4,5,3
14,4,2,3,3,3,4,4,4,5,6,4
15,5,3,4,4,4,5,5,5,6,7,5
16,6,4,5,5,5,6,6,6,7,8,6
17,7,5,6,6,6,7,7,7,8,9,7
18,8,6,7,7,7,8,8,8,9,NULL,8
19,9,7,8,8,8,9,9,9,NULL,NULL,9
20,NULL,8,9,9,9,NULL,NULL,NULL,NULL,NULL,NULL
20,NULL,9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL
====
---- QUERY
# Test all possible types for last_value (except decimal and date).
select id,
last_value(bool_col ignore nulls) over (order by id),
last_value(tinyint_col ignore nulls) over (order by id),
last_value(smallint_col ignore nulls) over (order by id),
last_value(int_col ignore nulls) over (order by id),
last_value(bigint_col ignore nulls) over (order by id),
last_value(float_col ignore nulls) over (order by id),
last_value(double_col ignore nulls) over (order by id),
last_value(string_col ignore nulls) over (order by id),
last_value(timestamp_col ignore nulls) over (order by id)
from functional.alltypesagg where id < 5
---- TYPES
INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, TIMESTAMP
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,true,NULL,NULL,NULL,NULL,NULL,NULL,'0',2010-01-01 00:00:00
0,true,NULL,NULL,NULL,NULL,NULL,NULL,'0',2010-01-01 00:00:00
1,false,1,1,1,10,1.100000023841858,10.1,'1',2010-01-01 00:01:00
2,true,2,2,2,20,2.200000047683716,20.2,'2',2010-01-01 00:02:00.100000000
3,false,3,3,3,30,3.299999952316284,30.3,'3',2010-01-01 00:03:00.300000000
4,true,4,4,4,40,4.400000095367432,40.4,'4',2010-01-01 00:04:00.600000000
---- DBAPI_RESULTS: VERIFY_IS_EQUAL_SORTED
0,true,NULL,NULL,NULL,NULL,NULL,NULL,'0',2010-01-01 00:00:00
0,true,NULL,NULL,NULL,NULL,NULL,NULL,'0',2010-01-01 00:00:00
1,false,1,1,1,10,1.100000023841858,10.1,'1',2010-01-01 00:01:00
2,true,2,2,2,20,2.200000047683716,20.2,'2',2010-01-01 00:02:00.100000
3,false,3,3,3,30,3.299999952316284,30.3,'3',2010-01-01 00:03:00.300000
4,true,4,4,4,40,4.400000095367432,40.4,'4',2010-01-01 00:04:00.600000
====
---- QUERY
# Test all possible types for first_value (except decimal).
select id,
first_value(bool_col ignore nulls) over (order by id),
first_value(tinyint_col ignore nulls) over (order by id),
first_value(smallint_col ignore nulls) over (order by id),
first_value(int_col ignore nulls) over (order by id),
first_value(bigint_col ignore nulls) over (order by id),
first_value(float_col ignore nulls) over (order by id),
first_value(double_col ignore nulls) over (order by id),
first_value(string_col ignore nulls) over (order by id),
first_value(timestamp_col ignore nulls) over (order by id)
from functional.alltypesagg where id < 5
---- TYPES
INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, TIMESTAMP
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0,true,NULL,NULL,NULL,NULL,NULL,NULL,'0',2010-01-01 00:00:00
0,true,NULL,NULL,NULL,NULL,NULL,NULL,'0',2010-01-01 00:00:00
1,true,1,1,1,10,1.100000023841858,10.1,'0',2010-01-01 00:00:00
2,true,1,1,1,10,1.100000023841858,10.1,'0',2010-01-01 00:00:00
3,true,1,1,1,10,1.100000023841858,10.1,'0',2010-01-01 00:00:00
4,true,1,1,1,10,1.100000023841858,10.1,'0',2010-01-01 00:00:00
====
---- QUERY
select
last_value(d1 ignore nulls) over (order by d1),
first_value(d1 ignore nulls) over (order by d1)
from functional.decimal_tbl
---- TYPES
DECIMAL, DECIMAL
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1234,1234
2345,1234
12345,1234
12345,1234
132842,1234
====
---- QUERY
select
last_value(date_col ignore nulls) over (order by date_col),
first_value(date_col ignore nulls) over (order by date_col)
from functional.date_tbl
---- TYPES
DATE, DATE
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0001-01-01,0001-01-01
0001-06-21,0001-01-01
0001-06-22,0001-01-01
0001-06-23,0001-01-01
0001-06-24,0001-01-01
0001-06-25,0001-01-01
0001-06-26,0001-01-01
0001-06-27,0001-01-01
0001-06-28,0001-01-01
0001-06-29,0001-01-01
0001-12-31,0001-01-01
0002-01-01,0001-01-01
1399-12-31,0001-01-01
2017-11-28,0001-01-01
2017-11-28,0001-01-01
2017-11-28,0001-01-01
2018-12-31,0001-01-01
9999-12-01,0001-01-01
9999-12-31,0001-01-01
9999-12-31,0001-01-01
9999-12-31,0001-01-01
9999-12-31,0001-01-01
====
---- QUERY
# Regression test for IMPALA-4120: Invoke UDFs which do local allocations and verifies
# that the results are copied out.
select count(*) from (
select
from_unixtime(lead(bigint_col, 1) over (order by id), 'yyyyMMddHH:mm:ss') as a,
lead(from_unixtime(bigint_col, 'yyyyMMddHH:mm:ss'), 1) over (order by id) AS b
from functional.alltypes) x
where x.a = x.b
---- TYPES
BIGINT
---- ERRORS
---- RESULTS
7299
====
---- QUERY
# Regression test for IMPALA-5623: lag() should free UDF allocated memory.
# i.e. the fix for IMPALA-4120 should apply to lag() as well.
select count(*) from (
select
from_unixtime(lag(bigint_col, 1) over (order by id), 'yyyyMMddHH:mm:ss') as a,
lag(from_unixtime(bigint_col, 'yyyyMMddHH:mm:ss'), 1) over (order by id) AS b
from functional.alltypes) x
where x.a = x.b
---- TYPES
BIGINT
---- ERRORS
---- RESULTS
7299
====
---- QUERY
# Regression test for IMPALA-4518: Distinguishes between empty string and null string.
select f,lead(b,1,null) over (order by f)
from (select * from nulltable union all select * from nulltable) x;
---- TYPES
STRING, STRING
---- RESULTS
'a\x00b',''
'a\x00b','NULL'
====
---- QUERY
# Test for IMPALA-5657: Ignore nulls should be applied correctly on views.
# Without the fix for IMPALA-5657, the select * query on the view does not
# apply ignore nulls and returns a few NULLs in the query result.
create view if not exists imp5657_view as
select
last_value(tinyint_col ignore nulls) over (order by tinyint_col)
from functional.alltypesagg
where id < 5;
====
---- QUERY
select * from imp5657_view;
---- TYPES
TINYINT
---- RESULTS
1
2
3
4
4
4
====
---- QUERY
# Test for IMPALA-1882: first_value works without order by clause. Therefore, output of
# function depends on order of input rows, which in this case is DESC
select id, bool_col, first_value(id) over (partition by bool_col) from
(select * from functional.alltypessmall order by id desc limit 5) as t1
---- TYPES
INT, BOOLEAN, INT
---- RESULTS
99,false,99
97,false,99
95,false,99
98,true,98
96,true,98
====
---- QUERY
# Test for IMPALA-1882: first_value works without order by clause. Therefore, output of
# function depends on order of input rows, which in this case is ASC
select id, bool_col, first_value(id) over (partition by bool_col) from
(select * from functional.alltypessmall order by id asc limit 5) as t1
---- TYPES
INT, BOOLEAN, INT
---- RESULTS
1,false,1
3,false,1
0,true,0
2,true,0
4,true,0
---- QUERY
# Test for IMPALA-1882: last_value works without order by clause. Therefore, output of
# function depends on order of input rows, which in this case is DESC
select id, bool_col, last_value(id) over (partition by bool_col) from
(select * from functional.alltypessmall order by id desc limit 5) as t1
---- TYPES
INT, BOOLEAN, INT
---- RESULTS
99,false,95
97,false,95
95,false,95
98,true,96
96,true,96
====
---- QUERY
# Test for IMPALA-1882: last_value works without order by clause. Therefore, output of
# function depends on order of input rows, which in this case is ASC
select id, bool_col, last_value(id) over (partition by bool_col) from
(select * from functional.alltypessmall order by id asc limit 5) as t1
---- TYPES
INT, BOOLEAN, INT
---- RESULTS
1,false,3
3,false,3
0,true,4
2,true,4
4,true,4
====
---- QUERY
# Test for IMPALA-1882: first_value works with ignore nulls and without order by clause.
# Therefore, output of function depends on order of input rows, which in this case is ASC
select bool_col, smallint_col, first_value(smallint_col ignore nulls) over
(partition by bool_col) from (select * from functional.alltypesagg where
id > 99 order by id asc limit 10) as t1
---- TYPES
BOOLEAN, SMALLINT, SMALLINT
---- RESULTS
false,1,1
false,3,1
false,5,1
false,7,1
true,NULL,2
true,NULL,2
true,2,2
true,4,2
true,6,2
true,8,2
====
---- QUERY
# Test for IMPALA-1882: first_value works with ignore nulls and without order by clause.
# Therefore, output of function depends on order of input rows, which in this case is DESC
select bool_col, smallint_col, first_value(smallint_col ignore nulls) over
(partition by bool_col) from (select * from functional.alltypesagg where
id < 101 order by id desc limit 10) as t1;
---- TYPES
BOOLEAN, SMALLINT, SMALLINT
---- RESULTS
false,99,99
false,97,99
false,95,99
false,93,99
true,NULL,98
true,NULL,98
true,98,98
true,96,98
true,94,98
true,92,98
====
---- QUERY
# Test for IMPALA-1882: last_value works with ignore nulls and without order by clause.
# Therefore, output of function depends on order of input rows, which in this case is ASC
select bool_col, smallint_col, last_value(smallint_col ignore nulls) over
(partition by bool_col) from (select * from functional.alltypesagg where
id > 99 order by id asc limit 10) as t1;
---- TYPES
BOOLEAN, SMALLINT, SMALLINT
---- RESULTS
false,1,7
false,3,7
false,5,7
false,7,7
true,NULL,8
true,NULL,8
true,2,8
true,4,8
true,6,8
true,8,8
====
---- QUERY
# Test for IMPALA-1882: last_value works with ignore nulls and without order by clause.
# Therefore, output of function depends on order of input rows, which in this case is DESC
select bool_col, smallint_col, last_value(smallint_col ignore nulls) over
(partition by bool_col) from (select * from functional.alltypesagg where
id < 110 order by id desc limit 10) as t1;
---- TYPES
BOOLEAN, SMALLINT, SMALLINT
---- RESULTS
false,9,1
false,7,1
false,5,1
false,3,1
false,1,1
true,8,2
true,6,2
true,4,2
true,2,2
true,NULL,2
====
---- QUERY
# IMPALA-6323 Partition by a constant is equivalent to no partitioning.
select x, count() over(partition by 1) from (VALUES((1 x), (2), (3))) T;
---- TYPES
TINYINT, BIGINT
---- RESULTS
1,3
2,3
3,3
====
---- QUERY
# IMPALA-6323 Order by a constant is equivalent to no ordering.
select x, count() over(order by 1) from (VALUES((1 x), (2), (3))) T;
---- TYPES
TINYINT, BIGINT
---- RESULTS
1,3
2,3
3,3
====
---- QUERY
# IMPALA-6323 Order by a constant is equivalent to no ordering.
SELECT FIRST_VALUE(0) OVER (ORDER BY 0 ASC)
FROM functional.alltypestiny;
---- TYPES
TINYINT
---- RESULTS
0
0
0
0
0
0
0
0
====