blob: 8839edba0cb025bf7e9ecee83addc6a4d560c51b [file] [log] [blame]
====
---- QUERY
# test a larger dataset, includes nulls
# the exact result could vary slightly due to numeric instability
# 0.001 is a conservative upperbound on the possible difference in results
SELECT abs(cast(variance(tinyint_col) as double) - 6.66741) < 0.001,
abs(cast(variance(double_col) as double) - 8470806.708) < 0.001
from alltypesagg
---- RESULTS
true,true
---- TYPES
boolean, boolean
====
---- QUERY
# No tuples processed (should return null)
SELECT variance(tinyint_col), stddev(smallint_col), variance_pop(int_col),
stddev_pop(bigint_col)
from alltypesagg WHERE id = -9999999
---- RESULTS
NULL,NULL,NULL,NULL
---- TYPES
double, double, double, double
====
---- QUERY
# exactly 1 tuple processed (variance_pop & stddev_pop are 0, stddev and variance
# are NULL)
SELECT variance(tinyint_col), variance_samp(smallint_col), variance_pop(int_col),
stddev(smallint_col), stddev_samp(smallint_col), stddev_pop(bigint_col)
from alltypesagg WHERE id = 1006
---- RESULTS
NULL,NULL,0,NULL,NULL,0
---- TYPES
double, double, double, double, double, double
====
---- QUERY
# Includes one row which is null, and test the aliases for variance() as well
SELECT variance(tinyint_col), variance(smallint_col), variance(int_col),
variance(bigint_col), variance(float_col), variance(double_col),
var_samp(double_col), variance_samp(double_col)
from alltypesagg WHERE id >= 1000 AND id < 1006
---- RESULTS
2.5,2.5,2.5,250,3.025,255.025,255.025,255.025
---- TYPES
double, double, double, double, double, double,double, double
====
---- QUERY
# Test population variance (including the var_pop() alias)
SELECT variance_pop(tinyint_col), variance_pop(smallint_col), variance_pop(int_col),
variance_pop(bigint_col), variance_pop(float_col), variance_pop(double_col),
var_pop(double_col)
from alltypesagg WHERE id >= 1000 AND id < 1006
---- RESULTS
2,2,2,200,2.42,204.02,204.02
---- TYPES
double, double, double, double, double, double, double
====
---- QUERY
SELECT round(stddev(tinyint_col), 5),
round(stddev(smallint_col), 5),
round(stddev(int_col), 5),
round(stddev(bigint_col), 5),
round(stddev(float_col), 5),
round(stddev(double_col), 5),
round(stddev_samp(double_col), 5)
from alltypesagg WHERE id >= 1000 AND id < 1006
---- RESULTS
1.58114,1.58114,1.58114,15.81139,1.73925,15.96950,15.96950
---- TYPES
double, double, double, double, double, double, double
====
---- QUERY
# no grouping exprs, cols contain nulls except for bool cols
SELECT round(stddev_pop(tinyint_col), 5),
round(stddev_pop(smallint_col), 5),
round(stddev_pop(int_col), 5),
round(stddev_pop(bigint_col), 5),
round(stddev_pop(float_col), 5),
round(stddev_pop(double_col), 5)
from alltypesagg WHERE id >= 1000 AND id < 1006
---- RESULTS
1.41421,1.41421,1.41421,14.14214,1.55563,14.28356
---- TYPES
double, double, double, double, double, double
====
---- QUERY
# no grouping exprs, cols contain nulls except for bool cols
select count(bool_col), min(bool_col), max(bool_col)
from alltypesagg where day is not null
---- RESULTS
10000,false,true
---- TYPES
bigint, boolean, boolean
====
---- QUERY
# no grouping exprs, cols contain nulls
select count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
avg(tinyint_col)
from alltypesagg where day is not null
---- RESULTS
10000,9000,1,9,45000,5
---- TYPES
bigint, bigint, tinyint, tinyint, bigint, double
====
---- QUERY
select count(*), count(smallint_col), min(smallint_col), max(smallint_col), sum(smallint_col),
avg(smallint_col)
from alltypesagg where day is not null
---- RESULTS
10000,9900,1,99,495000,50
---- TYPES
bigint, bigint, smallint, smallint, bigint, double
====
---- QUERY
select count(*), count(int_col), min(int_col), max(int_col), sum(int_col), avg(int_col)
from alltypesagg where day is not null
---- RESULTS
10000,9990,1,999,4995000,500
---- TYPES
bigint, bigint, int, int, bigint, double
====
---- QUERY
select count(*), count(bigint_col), min(bigint_col), max(bigint_col), sum(bigint_col),
avg(bigint_col)
from alltypesagg where day is not null
---- RESULTS
10000,9990,10,9990,49950000,5000
---- TYPES
bigint, bigint, bigint, bigint, bigint, double
====
---- QUERY
select count(*), count(float_col), min(float_col), max(float_col), sum(float_col),
avg(float_col)
from alltypesagg where day is not null
---- RESULTS
10000,9990,1.100000023841858,1098.900024414062,5494499.999767542,549.9999999767309
---- TYPES
bigint, bigint, float, float, double, double
====
---- QUERY
select count(*), count(double_col), min(double_col), max(double_col), round(sum(double_col), 0),
round(avg(double_col), 0)
from alltypesagg where day is not null
---- RESULTS
10000,9990,10.1,10089.9,50449500,5050
---- TYPES
bigint, bigint, double, double, double, double
====
---- QUERY
select count(*), min(string_col), max(string_col), min(date_string_col),
max(date_string_col)
from alltypesagg where day is not null
---- RESULTS
10000,'0','999','01/01/10','01/10/10'
---- TYPES
bigint, string, string, string, string
====
---- QUERY
# Test for IMPALA-3018. Verify update() functions of min() and max() handle
# zero-length string correctly.
select max(str), min(str) from (values ('aaa' as str), (''), ('123')) as tmp
---- RESULTS
'aaa',''
---- TYPES
string,string
====
---- QUERY
# Test for IMPALA-3018. Verify update() function of last_value() handles
# zero-length string correctly.
select last_value(b) over (partition by a order by d) from functional.nulltable;
---- RESULTS
''
---- TYPES
string
====
---- QUERY
# Test for IMPALA-3018. Verify update() function of first_value() handles
# zero-length string correctly.
select first_value(b) over (partition by a order by d) from functional.nulltable;
---- RESULTS
''
---- TYPES
string
====
---- QUERY
# grouping by different data types, with NULLs
select tinyint_col, count(*) from alltypesagg where day is not null group by 1 order by 1
---- RESULTS
1,1000
2,1000
3,1000
4,1000
5,1000
6,1000
7,1000
8,1000
9,1000
NULL,1000
---- TYPES
tinyint, bigint
====
---- QUERY
# grouping by different data types, with NULLs, grouping expr missing from select list
select bool_col,min(bool_col),max(bool_col) from alltypesagg where day is not null group by 1
---- RESULTS
false,false,false
true,true,true
---- TYPES
boolean,boolean,boolean
====
---- QUERY
select count(*) from alltypesagg where day is not null group by tinyint_col
---- RESULTS
1000
1000
1000
1000
1000
1000
1000
1000
1000
1000
---- TYPES
bigint
====
---- QUERY
select smallint_col % 10, count(*) from alltypesagg where day is not null group by 1
---- RESULTS
3,1000
NULL,100
8,1000
7,1000
0,900
6,1000
9,1000
5,1000
4,1000
1,1000
2,1000
---- TYPES
smallint, bigint
====
---- QUERY
select count(*) from alltypesagg where day is not null group by smallint_col % 10
---- RESULTS
1000
100
1000
1000
900
1000
1000
1000
1000
1000
1000
---- TYPES
bigint
====
---- QUERY
select int_col % 10, count(*) from alltypesagg where day is not null group by 1
---- RESULTS
4,1000
9,1000
NULL,10
6,1000
5,1000
2,1000
0,990
1,1000
3,1000
8,1000
7,1000
---- TYPES
int, bigint
====
---- QUERY
select count(*) from alltypesagg where day is not null group by int_col % 10
---- RESULTS
1000
1000
10
1000
1000
1000
990
1000
1000
1000
1000
---- TYPES
bigint
====
---- QUERY
# Check that ALL inside aggregates is correct
select count(ALL *) from alltypesagg where day is not null group by int_col % 10
---- RESULTS
1000
1000
10
1000
1000
1000
990
1000
1000
1000
1000
---- TYPES
bigint
====
---- QUERY
select bigint_col % 100, count(*) from alltypesagg where day is not null group by 1
---- RESULTS
60,1000
70,1000
20,1000
NULL,10
40,1000
80,1000
30,1000
0,990
50,1000
90,1000
10,1000
---- TYPES
bigint, bigint
====
---- QUERY
select count(*) from alltypesagg where day is not null group by bigint_col % 100
---- RESULTS
1000
1000
1000
10
1000
1000
1000
990
1000
1000
1000
---- TYPES
bigint
====
---- QUERY
select float_col, float_col * 2, count(*) from alltypes group by 1, 2
---- RESULTS
0,0,730
3.299999952316284,6.599999904632568,730
8.800000190734863,17.60000038146973,730
6.599999904632568,13.19999980926514,730
7.699999809265137,15.39999961853027,730
2.200000047683716,4.400000095367432,730
5.5,11,730
1.100000023841858,2.200000047683716,730
9.899999618530273,19.79999923706055,730
4.400000095367432,8.800000190734863,730
---- TYPES
float, double, bigint
====
---- QUERY
select count(*) from alltypes group by float_col
---- RESULTS
730
730
730
730
730
730
730
730
730
730
---- TYPES
bigint
====
---- QUERY
select float_col, count(*) from alltypesagg where float_col is null and day is not null group by 1
---- RESULTS
NULL,10
---- TYPES
float, bigint
====
---- QUERY
select double_col, double_col * 2, count(*) from alltypes group by 1, 2
---- RESULTS
0,0,730
90.90000000000001,181.8,730
40.4,80.8,730
20.2,40.4,730
80.8,161.6,730
10.1,20.2,730
70.7,141.4,730
50.5,101,730
30.3,60.6,730
60.6,121.2,730
---- TYPES
double, double, bigint
====
---- QUERY
select count(*) from alltypes group by double_col
---- RESULTS
730
730
730
730
730
730
730
730
730
730
---- TYPES
bigint
====
---- QUERY
select double_col, count(*) from alltypesagg where double_col is null and day is not null group by 1
---- RESULTS
NULL,10
---- TYPES
double, bigint
====
---- QUERY
select date_string_col, count(*) from alltypesagg where day is not null group by 1
---- RESULTS
'01/08/10',1000
'01/09/10',1000
'01/02/10',1000
'01/06/10',1000
'01/01/10',1000
'01/03/10',1000
'01/04/10',1000
'01/10/10',1000
'01/07/10',1000
'01/05/10',1000
---- TYPES
string, bigint
====
---- QUERY
select count(*) from alltypesagg where day is not null group by date_string_col
---- RESULTS
1000
1000
1000
1000
1000
1000
1000
1000
1000
1000
---- TYPES
bigint
====
---- QUERY
# grouping by multiple exprs, with nulls
select tinyint_col % 3, smallint_col % 3, count(*) from alltypesagg
where day = 1 group by 1, 2
---- RESULTS
0,0,120
0,1,90
0,2,90
1,0,90
1,1,120
1,2,90
2,0,90
2,1,90
2,2,120
NULL,0,30
NULL,1,30
NULL,2,30
NULL,NULL,10
---- TYPES
tinyint, smallint, bigint
====
---- QUERY
select count(*) from alltypesagg
where day = 1 group by tinyint_col % 3, smallint_col % 3
---- RESULTS
10
120
120
120
30
30
30
90
90
90
90
90
90
---- TYPES
bigint
====
---- QUERY
# same result as previous query
select tinyint_col % 3, smallint_col % 3, count(*) from alltypesagg where day = 1 group by 2, 1
---- RESULTS
0,0,120
0,1,90
0,2,90
1,0,90
1,1,120
1,2,90
2,0,90
2,1,90
2,2,120
NULL,0,30
NULL,1,30
NULL,2,30
NULL,NULL,10
---- TYPES
tinyint, smallint, bigint
====
---- QUERY
select tinyint_col % 2, smallint_col % 2, int_col % 2, bigint_col % 2, date_string_col, count(*)
from alltypesagg
where (date_string_col = '01/01/10' or date_string_col = '01/02/10') and day is not null
group by 1, 2, 3, 4, 5
---- RESULTS
1,1,1,0,'01/02/10',500
0,0,0,0,'01/02/10',400
NULL,NULL,0,0,'01/02/10',9
NULL,NULL,NULL,NULL,'01/02/10',1
0,0,0,0,'01/01/10',400
NULL,NULL,0,0,'01/01/10',9
NULL,NULL,NULL,NULL,'01/01/10',1
NULL,0,0,0,'01/02/10',90
1,1,1,0,'01/01/10',500
NULL,0,0,0,'01/01/10',90
---- TYPES
tinyint, smallint, int, bigint, string, bigint
====
---- QUERY
select count(*)
from alltypesagg
where (date_string_col = '01/01/10' or date_string_col = '01/02/10') and day is not null
group by tinyint_col % 2, smallint_col % 2, int_col % 2, bigint_col % 2, date_string_col
---- RESULTS
500
400
9
1
400
9
1
90
500
90
---- TYPES
bigint
====
---- QUERY
# no grouping cols, no matching rows
select count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col)
from alltypesagg
where tinyint_col = -1 and day is not null
---- RESULTS
0,NULL,NULL,NULL,NULL
---- TYPES
bigint, tinyint, tinyint, bigint, double
====
---- QUERY
select count(*), min(smallint_col), max(smallint_col), sum(smallint_col), avg(smallint_col)
from alltypesagg
where smallint_col = -1 and day is not null
---- RESULTS
0,NULL,NULL,NULL,NULL
---- TYPES
bigint, smallint, smallint, bigint, double
====
---- QUERY
select count(*), min(int_col), max(int_col), sum(int_col), avg(int_col)
from alltypesagg
where int_col = -1 and day is not null
---- RESULTS
0,NULL,NULL,NULL,NULL
---- TYPES
bigint, int, int, bigint, double
====
---- QUERY
select count(*), min(bigint_col), max(bigint_col), sum(bigint_col), avg(bigint_col)
from alltypesagg
where bigint_col = -1 and day is not null
---- RESULTS
0,NULL,NULL,NULL,NULL
---- TYPES
bigint, bigint, bigint, bigint, double
====
---- QUERY
select count(*), min(float_col), max(float_col), sum(float_col), avg(float_col)
from alltypesagg
where float_col < -1.0 and day is not null
---- RESULTS
0,NULL,NULL,NULL,NULL
---- TYPES
bigint, float, float, double, double
====
---- QUERY
select count(*), min(double_col), max(double_col), sum(double_col), avg(double_col)
from alltypesagg
where double_col < -1.0 and day is not null
---- RESULTS
0,NULL,NULL,NULL,NULL
---- TYPES
bigint, double, double, double, double
====
---- QUERY
# HAVING clauses over all aggregation functions, plus compound HAVING clauses
select int_col % 7, count(*), max(int_col) from alltypesagg where day is not null group by 1
---- RESULTS
4,1430,998
NULL,10,NULL
6,1420,993
5,1430,999
2,1430,996
0,1420,994
1,1430,995
3,1430,997
---- TYPES
int, bigint, int
====
---- QUERY
select int_col % 7, count(*) from alltypesagg where day is not null group by 1 having max(int_col) > 991
---- RESULTS
4,1430
6,1420
5,1430
2,1430
0,1420
1,1430
3,1430
---- TYPES
int, bigint
====
---- QUERY
select int_col % 7, count(*) from alltypesagg where day is not null group by 1
having max(int_col) > 991 and count(*) > 1420
---- RESULTS
4,1430
5,1430
2,1430
1,1430
3,1430
---- TYPES
int, bigint
====
---- QUERY
select int_col % 7, count(*) from alltypesagg where day is not null group by 1
having min(int_col) < 7
---- RESULTS
4,1430
6,1420
5,1430
2,1430
1,1430
3,1430
---- TYPES
int, bigint
====
---- QUERY
select int_col % 7, count(*) from alltypesagg where day is not null group by 1
having min(int_col) < 7 and count(*) > 1420
---- RESULTS
4,1430
5,1430
2,1430
1,1430
3,1430
---- TYPES
int, bigint
====
---- QUERY
select int_col % 7, count(*), sum(int_col) from alltypesagg where day is not null group by 1
---- RESULTS
4,1430,716430
NULL,10,NULL
6,1420,709290
5,1430,717860
2,1430,713570
0,1420,710710
1,1430,712140
3,1430,715000
---- TYPES
int, bigint, bigint
====
---- QUERY
select int_col % 7, count(*), sum(int_col) from alltypesagg where day is not null group by 1
having sum(int_col) >= 715000
---- RESULTS
4,1430,716430
5,1430,717860
3,1430,715000
---- TYPES
int, bigint, bigint
====
---- QUERY
select int_col % 7, count(*), sum(int_col) from alltypesagg where day is not null group by 1
having sum(int_col) >= 715000 or count(*) > 1420
---- RESULTS
4,1430,716430
5,1430,717860
2,1430,713570
1,1430,712140
3,1430,715000
---- TYPES
int, bigint, bigint
====
---- QUERY
select int_col % 7, count(*), sum(int_col) from alltypesagg where day is not null group by 1
having sum(int_col) is null
---- RESULTS
NULL,10,NULL
---- TYPES
int, bigint, bigint
====
---- QUERY
select int_col % 7, count(*), avg(int_col) from alltypesagg where day is not null group by 1
---- RESULTS
4,1430,501
NULL,10,NULL
6,1420,499.5
5,1430,502
2,1430,499
0,1420,500.5
1,1430,498
3,1430,500
---- TYPES
int, bigint, double
====
---- QUERY
select int_col % 7, count(*), avg(int_col) from alltypesagg where day is not null group by 1
having avg(int_col) > 500
---- RESULTS
4,1430,501
5,1430,502
0,1420,500.5
---- TYPES
int, bigint, double
====
---- QUERY
select int_col % 7, count(*), avg(int_col) from alltypesagg where day is not null group by 1
having avg(int_col) > 500 or count(*) = 10
---- RESULTS
4,1430,501
NULL,10,NULL
5,1430,502
0,1420,500.5
---- TYPES
int, bigint, double
====
---- QUERY
select timestamp_col, count(*) from alltypesagg where day is not null
group by timestamp_col having timestamp_col < cast('2010-01-01 01:05:20' as timestamp)
---- RESULTS
2010-01-01 00:49:11.760000000,1
2010-01-01 01:01:18.300000000,1
2010-01-01 00:17:01.360000000,1
2010-01-01 00:58:16.530000000,1
2010-01-01 00:09:00.360000000,1
2010-01-01 00:00:00,1
2010-01-01 01:00:17.700000000,1
2010-01-01 00:57:15.960000000,1
2010-01-01 00:24:02.760000000,1
2010-01-01 00:23:02.530000000,1
2010-01-01 00:45:09.900000000,1
2010-01-01 00:39:07.410000000,1
2010-01-01 00:33:05.280000000,1
2010-01-01 00:03:00.300000000,1
2010-01-01 00:20:01.900000000,1
2010-01-01 00:36:06.300000000,1
2010-01-01 00:44:09.460000000,1
2010-01-01 00:14:00.910000000,1
2010-01-01 00:31:04.650000000,1
2010-01-01 00:48:11.280000000,1
2010-01-01 01:03:19.530000000,1
2010-01-01 00:29:04.600000000,1
2010-01-01 01:02:18.910000000,1
2010-01-01 00:16:01.200000000,1
2010-01-01 00:47:10.810000000,1
2010-01-01 00:51:12.750000000,1
2010-01-01 00:55:14.850000000,1
2010-01-01 00:42:08.610000000,1
2010-01-01 00:56:15.400000000,1
2010-01-01 00:05:00.100000000,1
2010-01-01 00:43:09.300000000,1
2010-01-01 00:28:03.780000000,1
2010-01-01 00:04:00.600000000,1
2010-01-01 00:54:14.310000000,1
2010-01-01 00:26:03.250000000,1
2010-01-01 00:32:04.960000000,1
2010-01-01 00:46:10.350000000,1
2010-01-01 00:37:06.660000000,1
2010-01-01 00:50:12.250000000,1
2010-01-01 00:27:03.510000000,1
2010-01-01 00:19:01.710000000,1
2010-01-01 00:40:07.800000000,1
2010-01-01 00:07:00.210000000,1
2010-01-01 00:22:02.310000000,1
2010-01-01 00:21:02.100000000,1
2010-01-01 00:18:01.530000000,1
2010-01-01 00:11:00.550000000,1
2010-01-01 00:35:05.950000000,1
2010-01-01 00:30:04.350000000,1
2010-01-01 00:08:00.280000000,1
2010-01-01 00:34:05.610000000,1
2010-01-01 00:15:01.500000000,1
2010-01-01 00:41:08.200000000,1
2010-01-01 00:02:00.100000000,1
2010-01-01 00:01:00,1
2010-01-01 00:10:00.450000000,1
2010-01-01 00:52:13.260000000,1
2010-01-01 01:04:20.160000000,1
2010-01-01 00:12:00.660000000,1
2010-01-01 00:38:07.300000000,1
2010-01-01 00:53:13.780000000,1
2010-01-01 00:25:03,1
2010-01-01 00:59:17.110000000,1
2010-01-01 00:06:00.150000000,1
2010-01-01 00:13:00.780000000,1
---- TYPES
timestamp, bigint
====
---- QUERY
# Test NULLs in aggregate functions
select count(NULL), min(NULL), max(NULL), sum(NULL), avg(NULL) from alltypesagg
where day is not null
---- RESULTS
0,NULL,NULL,NULL,NULL
---- TYPES
bigint, boolean, boolean, bigint, double
====
---- QUERY
# Test ignored distinct in MIN and MAX with NULLs
select min(distinct NULL), max(distinct NULL) from alltypes
---- RESULTS
NULL,NULL
---- TYPES
boolean, boolean
====
---- QUERY
# Test group_concat with default delimiter. Use a subquery with an ORDER BY to
# ensure group_concat results are in a deterministic order.
select day, group_concat(string_col)
from (select * from alltypesagg where id % 100 = day order by id limit 99999) a
group by day
---- RESULTS: VERIFY_IS_EQUAL_SORTED
3,'3, 103, 203, 303, 403, 503, 603, 703, 803, 903'
5,'5, 105, 205, 305, 405, 505, 605, 705, 805, 905'
8,'8, 108, 208, 308, 408, 508, 608, 708, 808, 908'
4,'4, 104, 204, 304, 404, 504, 604, 704, 804, 904'
9,'9, 109, 209, 309, 409, 509, 609, 709, 809, 909'
2,'2, 102, 202, 302, 402, 502, 602, 702, 802, 902'
6,'6, 106, 206, 306, 406, 506, 606, 706, 806, 906'
10,'10, 110, 210, 310, 410, 510, 610, 710, 810, 910'
7,'7, 107, 207, 307, 407, 507, 607, 707, 807, 907'
1,'1, 101, 201, 301, 401, 501, 601, 701, 801, 901'
---- TYPES
int, string
====
---- QUERY
# Test group_concat with NULL (default) delimiter
select day, group_concat(string_col, NULL)
from (select * from alltypesagg where id % 100 = day order by id limit 99999) a
group by day
---- RESULTS: VERIFY_IS_EQUAL_SORTED
3,'3, 103, 203, 303, 403, 503, 603, 703, 803, 903'
5,'5, 105, 205, 305, 405, 505, 605, 705, 805, 905'
8,'8, 108, 208, 308, 408, 508, 608, 708, 808, 908'
4,'4, 104, 204, 304, 404, 504, 604, 704, 804, 904'
9,'9, 109, 209, 309, 409, 509, 609, 709, 809, 909'
2,'2, 102, 202, 302, 402, 502, 602, 702, 802, 902'
6,'6, 106, 206, 306, 406, 506, 606, 706, 806, 906'
10,'10, 110, 210, 310, 410, 510, 610, 710, 810, 910'
7,'7, 107, 207, 307, 407, 507, 607, 707, 807, 907'
1,'1, 101, 201, 301, 401, 501, 601, 701, 801, 901'
---- TYPES
int, string
====
---- QUERY
# Test group_concat with both args as NULL
select day, group_concat(NULL, NULL)
from (select * from alltypesagg where id % 100 = day order by id limit 99999) a
group by day
---- RESULTS: VERIFY_IS_EQUAL_SORTED
3,'NULL'
5,'NULL'
8,'NULL'
4,'NULL'
9,'NULL'
2,'NULL'
6,'NULL'
10,'NULL'
7,'NULL'
1,'NULL'
---- TYPES
int, string
====
---- QUERY
# Test group_concat with arrow delimiter
select day, group_concat(string_col, "->")
from (select * from alltypesagg where id % 100 = day order by id limit 99999) a
group by day
---- RESULTS: VERIFY_IS_EQUAL_SORTED
3,'3->103->203->303->403->503->603->703->803->903'
5,'5->105->205->305->405->505->605->705->805->905'
8,'8->108->208->308->408->508->608->708->808->908'
4,'4->104->204->304->404->504->604->704->804->904'
9,'9->109->209->309->409->509->609->709->809->909'
2,'2->102->202->302->402->502->602->702->802->902'
6,'6->106->206->306->406->506->606->706->806->906'
10,'10->110->210->310->410->510->610->710->810->910'
7,'7->107->207->307->407->507->607->707->807->907'
1,'1->101->201->301->401->501->601->701->801->901'
---- TYPES
int, string
====
---- QUERY
# Test group_concat with column delimiter
# Will cause all columns save first to be duplicated
select day, group_concat(trim(string_col), trim(string_col))
from (select * from alltypesagg where id % 200 = day order by id limit 99999) a
group by day
---- RESULTS: VERIFY_IS_EQUAL_SORTED
3,'3203203403403603603803803'
5,'5205205405405605605805805'
8,'8208208408408608608808808'
4,'4204204404404604604804804'
9,'9209209409409609609809809'
2,'2202202402402602602802802'
6,'6206206406406606606806806'
10,'10210210410410610610810810'
7,'7207207407407607607807807'
1,'1201201401401601601801801'
---- TYPES
int, string
====
---- QUERY
# Test group_concat with multiple agg columns
select day, group_concat(string_col, '->'), group_concat(date_string_col)
from (select * from alltypesagg where id % 250 = day order by id limit 99999) a
group by day
---- RESULTS: VERIFY_IS_EQUAL_SORTED
3,'3->253->503->753','01/03/10, 01/03/10, 01/03/10, 01/03/10'
5,'5->255->505->755','01/05/10, 01/05/10, 01/05/10, 01/05/10'
8,'8->258->508->758','01/08/10, 01/08/10, 01/08/10, 01/08/10'
4,'4->254->504->754','01/04/10, 01/04/10, 01/04/10, 01/04/10'
9,'9->259->509->759','01/09/10, 01/09/10, 01/09/10, 01/09/10'
2,'2->252->502->752','01/02/10, 01/02/10, 01/02/10, 01/02/10'
6,'6->256->506->756','01/06/10, 01/06/10, 01/06/10, 01/06/10'
10,'10->260->510->760','01/10/10, 01/10/10, 01/10/10, 01/10/10'
7,'7->257->507->757','01/07/10, 01/07/10, 01/07/10, 01/07/10'
1,'1->251->501->751','01/01/10, 01/01/10, 01/01/10, 01/01/10'
---- TYPES
int, string, string
====
---- QUERY
# Test group_concat distinct with multiple agg columns
select day, group_concat(string_col, '->'), group_concat(date_string_col),
group_concat(distinct date_string_col)
from (select * from alltypesagg where id % 250 = day order by id limit 99999) a
group by day order by day
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,'1->251->501->751','01/01/10, 01/01/10, 01/01/10, 01/01/10','01/01/10'
2,'2->252->502->752','01/02/10, 01/02/10, 01/02/10, 01/02/10','01/02/10'
3,'3->253->503->753','01/03/10, 01/03/10, 01/03/10, 01/03/10','01/03/10'
4,'4->254->504->754','01/04/10, 01/04/10, 01/04/10, 01/04/10','01/04/10'
5,'5->255->505->755','01/05/10, 01/05/10, 01/05/10, 01/05/10','01/05/10'
6,'6->256->506->756','01/06/10, 01/06/10, 01/06/10, 01/06/10','01/06/10'
7,'7->257->507->757','01/07/10, 01/07/10, 01/07/10, 01/07/10','01/07/10'
8,'8->258->508->758','01/08/10, 01/08/10, 01/08/10, 01/08/10','01/08/10'
9,'9->259->509->759','01/09/10, 01/09/10, 01/09/10, 01/09/10','01/09/10'
10,'10->260->510->760','01/10/10, 01/10/10, 01/10/10, 01/10/10','01/10/10'
---- TYPES
int, string, string, string
====
---- QUERY
# Test group_concat with null result
select group_concat(string_col) from alltypesagg where string_col = NULL;
---- RESULTS
'NULL'
---- TYPES
string
====
---- QUERY
# Test group_concat distinct with null result
select group_concat(distinct string_col) from alltypesagg where string_col = NULL;
---- RESULTS
'NULL'
---- TYPES
string
====
---- QUERY
# Test group_concat with merge node
select group_concat(string_col) from alltypesagg where int_col = 1
---- RESULTS
'1, 1, 1, 1, 1, 1, 1, 1, 1, 1'
---- TYPES
STRING
====
---- QUERY
# Test merge phase uses correct separator (IMPALA-1110). The query needs to load data
# from multiple nodes in order to exercise this path, however the merge order is
# non-deterministic. So, aggregate a string literal to make the result deterministic.
select group_concat('abc', 'xy') from functional.alltypesagg where id % 1000 = day
---- RESULTS
'abcxyabcxyabcxyabcxyabcxyabcxyabcxyabcxyabcxyabc'
---- TYPES
string
====
---- QUERY
# Same as last query test, now adding the "distinct" clause
select group_concat(distinct 'abc', 'xy') from functional.alltypesagg
where id % 1000 = day
---- RESULTS
'abc'
---- TYPES
string
====
---- QUERY
# Test group_concat when separator varies by row.
select group_concat(cast(id as string), cast ((10 - id) as string))
from functional.alltypesagg
where id < 10 and day is not null
group by day
---- RESULTS
'0918273645546372819'
---- TYPES
string
====
---- QUERY
# Test correct removal of redundant group-by expressions (IMPALA-817)
select int_col * int_col, int_col + int_col
from functional.alltypesagg
group by int_col * int_col, int_col + int_col, int_col * int_col
having (int_col + int_col) < 5 order by 1 limit 10
---- RESULTS
1,2
4,4
---- TYPES
bigint,bigint
====
---- QUERY
# Test that binding predicates on an aggregation properly trigger materialization of
# slots in the agg tuple and the slots needed for evaluating the corresponding agg funcs
# (IMPALA-822).
select 1 from
(select count(bigint_col) c from functional.alltypesagg
having min(int_col) is not null) as t
where c is not null
---- RESULTS
1
---- TYPES
tinyint
====
---- QUERY
# Regression test for subexpr elimination in codegen. IMPALA-765
select count(tinyint_col), sum(tinyint_col * tinyint_col) from alltypesagg
---- RESULTS
9000,285000
---- TYPES
bigint,bigint
====
---- QUERY
# Regression test for subexpr elimination in codegen. IMPALA-765
select count(int_col), sum(int_col), avg(int_col) from alltypesagg where int_col is NULL
---- RESULTS
0,NULL,NULL
---- TYPES
bigint,bigint,double
====
---- QUERY
# Regression test for subexpr elimination in codegen. IMPALA-850
select id % 2, int_col > 1, id from alltypesagg where id < 2 group by 1,2,3
---- RESULTS
0,NULL,0
1,false,1
---- TYPES
int,boolean,int
====
---- QUERY
# Regression test for min/max of all negative values. IMPALA-869.
select min(cast(-1.0 as float)), max(cast(-1.0 as float)) from tinytable
---- RESULTS
-1,-1
---- TYPES
float,float
====
---- QUERY
# Regression test codegen with nulls and compound predicates. IMPALA-892.
select COUNT(int_col is not null AND bool_col) - COUNT(bool_col) FROM alltypesagg
---- RESULTS
0
---- TYPES
BIGINT
====
---- QUERY
select histogram(bool_col) from functional.alltypestiny;
---- RESULTS
'0, 0, 0, 0, 1, 1, 1, 1'
---- TYPES
STRING
====
---- QUERY
select histogram(tinyint_col) from functional.alltypestiny;
---- RESULTS
'0, 0, 0, 0, 1, 1, 1, 1'
---- TYPES
STRING
====
---- QUERY
select histogram(smallint_col) from functional.alltypestiny;
---- RESULTS
'0, 0, 0, 0, 1, 1, 1, 1'
---- TYPES
STRING
====
---- QUERY
select histogram(int_col) from functional.alltypestiny;
---- RESULTS
'0, 0, 0, 0, 1, 1, 1, 1'
---- TYPES
STRING
====
---- QUERY
select histogram(bigint_col) from functional.alltypestiny;
---- RESULTS
'0, 0, 0, 0, 10, 10, 10, 10'
---- TYPES
STRING
====
---- QUERY
select histogram(float_col) from functional.alltypestiny;
---- RESULTS
'0, 0, 0, 0, 1.1, 1.1, 1.1, 1.1'
---- TYPES
STRING
====
---- QUERY
select histogram(double_col) from functional.alltypestiny;
---- RESULTS
'0, 0, 0, 0, 10.1, 10.1, 10.1, 10.1'
---- TYPES
STRING
====
---- QUERY
select histogram(string_col) from functional.alltypestiny;
---- RESULTS
'0, 0, 0, 0, 1, 1, 1, 1'
---- TYPES
STRING
====
---- QUERY
select histogram(timestamp_col) from functional.alltypestiny;
---- RESULTS
'2009-01-01 00:00:00, 2009-01-01 00:01:00, 2009-02-01 00:00:00, 2009-02-01 00:01:00, 2009-03-01 00:00:00, 2009-03-01 00:01:00, 2009-04-01 00:00:00, 2009-04-01 00:01:00'
---- TYPES
STRING
====
---- QUERY
# IMPALA-4787: appx_median() on a medium sized dataset. This should excercise merge() with
# differently sized inputs in the Reservoir Sampling algorithm.
select
appx_median(bool_col),
appx_median(tinyint_col),
appx_median(smallint_col),
appx_median(int_col),
appx_median(float_col),
appx_median(double_col),
appx_median(string_col),
appx_median(timestamp_col)
from alltypes
---- RESULTS
true,5,5,5,5.5,50.5,'5',2010-01-01 00:00:00
---- TYPES
BOOLEAN, TINYINT, SMALLINT, INT, FLOAT, DOUBLE, STRING, TIMESTAMP
====
---- QUERY
# IMPALA-4787: appx_median on a large dataset. This requires several buffer resizes in the
# Reservoir Sampling algorithm.
select appx_median(l_returnflag)
from tpch.lineitem
where l_returnflag = "N"
---- RESULTS
'N'
---- TYPES
STRING
====
---- QUERY
# IMPALA-1419: Agg fn containing arithmetic expr on NULL fails
select count(null * 1) from functional.alltypes
---- RESULTS
0
---- TYPES
BIGINT
====
---- QUERY
# IMPALA-1898: ordinal in group/order by combined with explicit select-list alias
# that match columns in underlying table
select extract(timestamp_col, 'year') as timestamp_col,
extract(timestamp_col, 'month') as month,
sum(tinyint_col)
from functional.alltypes
group by 1, 2
order by 1, 2;
---- RESULTS
2009,1,1395
2009,2,1260
2009,3,1395
2009,4,1350
2009,5,1395
2009,6,1350
2009,7,1395
2009,8,1395
2009,9,1350
2009,10,1395
2009,11,1350
2009,12,1395
2010,1,1395
2010,2,1260
2010,3,1395
2010,4,1350
2010,5,1395
2010,6,1350
2010,7,1395
2010,8,1395
2010,9,1350
2010,10,1395
2010,11,1350
2010,12,1395
---- TYPES
BIGINT,BIGINT,BIGINT
====
---- QUERY
# IMPALA-2089: Tests correct elimination of redundant predicates.
# The equivalences between inline-view slots are enforced inside the inline-view plan.
# Equivalences between simple grouping slots (with SlotRef grouping exprs) are enforced
# at the scan, and equivalences between grouping slots with complex grouping exprs are
# enforced at the aggregation.
select t2.timestamp_col, t1.int_col_1
from
(select coalesce(t1.smallint_col, t1.month, t1.month) as int_col,
(count(t1.int_col)) <= (coalesce(t1.smallint_col, t1.month, t1.month)) as boolean_col,
(t1.bigint_col) + (t1.smallint_col) as int_col_1
from functional.alltypes t1
group by coalesce(t1.smallint_col, t1.month, t1.month), (t1.bigint_col) + (t1.smallint_col)
having (t1.bigint_col) + (t1.smallint_col) != (count(t1.bigint_col + t1.smallint_col))
) t1
inner join functional.alltypes t2
on (t2.month = t1.int_col and t2.month = t1.int_col_1 and t2.tinyint_col = t1.int_col)
where t2.int_col IN (t1.int_col_1, t1.int_col)
---- RESULTS
---- TYPES
TIMESTAMP,BIGINT
====
---- QUERY
# IMPALA-5036: Tests the correctness of the Parquet count(*) optimization.
select count(1)
from functional_parquet.alltypes
---- RESULTS
7300
---- TYPES
bigint
=====
---- QUERY
# IMPALA-5036: Parquet count(*) optimization with predicates on the partition columns.
select count(1)
from functional_parquet.alltypes where year < 2010 and month > 8
---- RESULTS
1220
---- TYPES
bigint
=====
---- QUERY
# IMPALA-5036: Parquet count(*) optimization with group by partition columns.
select year, month, count(1)
from functional_parquet.alltypes where month > 10 group by year, month
---- RESULTS
2009,11,300
2009,12,310
2010,11,300
2010,12,310
---- TYPES
int, int, bigint
=====
---- QUERY
# IMPALA-5036: Parquet count(*) optimization with both group by and predicates on
# partition columns.
select count(1)
from functional_parquet.alltypes where year < 2010 and month > 8
group by month
---- RESULTS
310
300
310
300
---- TYPES
bigint
=====
---- QUERY
# IMPALA-5036: Parquet count(*) optimization with the result of the going into a join.
select x.bigint_col from functional.alltypes x
inner join (
select count(1) as a from functional_parquet.alltypes group by year
) t on x.id = t.a;
---- RESULTS
0
0
---- TYPES
bigint
=====
---- QUERY
# IMPALA-5036: Parquet count(*) optimization with the agg function in the having clause.
select 1 from functional_parquet.alltypes having count(*) > 1
---- RESULTS
1
---- TYPES
tinyint
====
---- QUERY
# IMPALA-5855: pre-aggregation does not reserve enough memory with 2MB buffers.
# The pre-aggregation in this query is estimated to consume enough memory for the planner
# to use 2MB buffers.
set debug_action="-1:PREPARE:SET_DENY_RESERVATION_PROBABILITY@1.0";
select count(*) from (
select distinct l_orderkey, l_comment from tpch_parquet.lineitem) v
---- RESULTS
6001198
---- TYPES
bigint
====
---- QUERY
# IMPALA-6295: min/max where nan/inf/-inf are the only values
with x as (select cast('nan' as float) a, cast('inf' as float) b, cast('-inf' as float) c)
select min(a), min(b), min(c), max(a), max(b), max(c) from x
---- RESULTS
NaN,Infinity,-Infinity,NaN,Infinity,-Infinity
---- TYPES
FLOAT,FLOAT,FLOAT,FLOAT,FLOAT,FLOAT
====
---- QUERY
# IMPALA-6295: min/max/sum/avg should return nan if any of the values is nan
with x as (values (0), (1), (cast('nan' as double)), (cast('inf' as double)),
(cast('-inf' as double)))
select min(`0`), max(`0`), sum(`0`), avg(`0`) from x
---- RESULTS
NaN,NaN,NaN,NaN
---- TYPES
DOUBLE,DOUBLE,DOUBLE,DOUBLE
====
---- QUERY
# Test behavior of inf
with x as (values (0), (cast('inf' as double)), (5.2))
select min(`0`), max(`0`), sum(`0`), avg(`0`) from x
---- RESULTS
0,Infinity,Infinity,Infinity
---- TYPES
DOUBLE,DOUBLE,DOUBLE,DOUBLE
====
---- QUERY
# Test behavior of -inf
with x as (values (cast('-inf' as double)), (0), (-10))
select min(`cast('-inf' as double)`), max(`cast('-inf' as double)`),
sum(`cast('-inf' as double)`), avg(`cast('-inf' as double)`)
from x
---- RESULTS
-Infinity,0,-Infinity,-Infinity
---- TYPES
DOUBLE,DOUBLE,DOUBLE,DOUBLE
====
---- QUERY
# IMPALA-7397: conjunct that makes allocations in Prepare (extract) assigned to agg node
select timestamp_col, count(int_col) from alltypesagg group by timestamp_col, int_col
having extract(hour from timestamp_col) = int_col
---- TYPES
TIMESTAMP,BIGINT
---- RESULTS
====
---- QUERY
# GROUP BY of NaN values aggregates NaN's as one grouping
select count(*), sqrt(0.5-x) as Z
from (VALUES((1.6 x, 2 y), (3.2, 4), (5.4,6))) T
group by Z
---- RESULTS
3, NaN
---- TYPES
bigint, double
====
---- QUERY
# GROUP BY of NaN values aggregates NaN's as one grouping
select count(*), cast(sqrt(0.4-x) as FLOAT) as P, cast(sqrt(1.5-y) as FLOAT) as Q
from (VALUES((1.6 x, 1.6 y, 0 z), (0.5, 0.5, 0), (5.4, 6, 0),
(0.5, 0.5, 0), (0.5, 0.5, 0), (-0.6, 0.5, 0))) T
group by P, Q order by P, Q
---- RESULTS
2, NaN, NaN
3, NaN, 1
1, 1, 1
---- TYPES
bigint, float, float
====
---- QUERY
# IMPALA-6660: GROUP BY of -0/+0 values aggregates zeros as one grouping
select x, count(*)
from (values(cast("-0" as float) x), (cast("0" as float))) v
group by x
---- RESULTS
0,2
---- TYPES
float, bigint
====
---- QUERY
# IMPALA-6660: -0/+0 values are not distinct
select distinct *
from (values(cast("-0" as float)), (cast("0" as float))) v;
---- RESULTS
0
---- TYPES
float
====
---- QUERY
# IMPALA-8140: Test that group by with limit does not crash on Asan
select count(*) from tpch_parquet.orders o group by o.o_clerk limit 10
# We don't validate the results since the order is not deterministic.
---- TYPES
bigint
====
---- QUERY
# CORR() function examples
select corr(ps_availqty, ps_supplycost) from tpch.partsupp;
---- RESULTS
0.000321849166315
---- TYPES
double
====
---- QUERY
# Behavior of CORR() on null table
select corr(d, e) from functional.nulltable;
---- RESULTS
NULL
---- TYPES
double
====
---- QUERY
# Behavior of CORR() on empty table
select corr(f2, f2) from functional.emptytable;
---- RESULTS
NULL
---- TYPES
double
====
---- QUERY
# CORR() on different datatypes
select corr(tinyint_col, tinyint_col), corr(smallint_col, smallint_col),
corr(int_col, int_col), corr(bigint_col, bigint_col), corr(float_col, float_col),
corr(double_col, double_col), corr(timestamp_col, timestamp_col) from functional.alltypes;
---- RESULTS
1.0,1.0,1.0,1.0,1.0,1.0,1.0
---- TYPES
double,double,double,double,double,double,double
====
---- QUERY
# CORR() on timestamp columns
select corr(utctime, localtime) from functional.alltimezones;
---- RESULTS
0.999995807708
---- TYPES
double
====
---- QUERY
# Since group by id will result in a single row, this test shows that corr() returns null in case of a single row.
select id, corr(int_col, int_col) from functional.alltypestiny group by id;
---- RESULTS
2,NULL
4,NULL
0,NULL
6,NULL
1,NULL
7,NULL
3,NULL
5,NULL
---- TYPES
int,double
====
---- QUERY
# CORR() on decimal datatype
select corr(d3, d4) from functional.decimal_tbl;
---- RESULTS
NULL
---- TYPES
double
====
---- QUERY
select year, corr(double_col, double_col) from functional.alltypes group by year;
---- RESULTS
2009,1.0
2010,1.0
---- TYPES
int,double
====
---- QUERY
select corr(double_col, -double_col) from functional.alltypes;
---- RESULTS
-1.0
---- TYPES
double
====
---- QUERY
select corr(double_col, double_col) from functional.alltypes;
---- RESULTS
1.0
---- TYPES
double
====
---- QUERY
select corr(ss_sold_time_sk, ss_quantity) from tpcds.store_sales;
---- RESULTS
0.000136790587885
---- TYPES
double
====
---- QUERY
select s_store_sk, corr(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk)
from tpcds.store;
---- RESULTS
5,NULL
8,1.0
12,-0.328248435346
1,NULL
2,-1.0
3,-0.511481796057
4,-0.881269090479
6,-0.853760432996
7,0.0825005896565
9,0.0282032912339
10,0.223462144551
11,0.325760965741
---- TYPES
int,double
====
---- QUERY
select id, double_col, corr(double_col, int_col) over (partition by month order by id) from functional.alltypes
order by id limit 10;
---- RESULTS
0,0.0,NULL
1,10.1,1.0
2,20.2,1.0
3,30.3,1.0
4,40.4,1.0
5,50.5,1.0
6,60.6,1.0
7,70.7,1.0
8,80.8,1.0
9,90.9,1.0
---- TYPES
int,double,double
====
---- QUERY
# CORR() when one column is filled with null
select corr(null_int, rand()), corr(rand(), null_int) from functional.nullrows;
---- RESULTS
NULL,NULL
---- TYPES
double,double
====
---- QUERY
# CORR() supporting join
select corr(A.double_col, B.double_col) from functional.alltypes A, functional.alltypes B where A.id=B.id;
---- RESULTS
1.0
---- TYPES
double
====
---- QUERY
# Tests functioning of CorrRemoveState()
select s_store_sk, corr(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between 5 preceding and 2 following) from tpcds.store;
---- RESULTS
5,-0.328248435346
8,-0.328248435346
12,-0.328248435346
1,-0.511481796057
2,-0.881269090479
3,-0.853760432996
4,0.0825005896565
6,0.0282032912339
7,0.223462144551
9,0.294637370885
10,0.115190500757
11,-0.0690212865931
---- TYPES
int,double
====
---- QUERY
# Mathematical operations on double can lead to variance becoming negative by a very small amount (around +1e-13),
# to avoid that a check is added (state->xvar <= 0.0 || state->yvar <= 0.0), without which the below test will
# result in nan for certain cases.
select s_store_sk, corr(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between 1 preceding and 1 following) from tpcds.store;
---- RESULTS
5,1.0
8,-0.328248435346
12,-1.0
1,-1.0
2,-0.511481796057
3,-0.829250636825
4,-0.884750027905
6,-0.735742100541
7,-0.165224541153
9,0.97066733931
10,1.0
11,NULL
---- TYPES
int,double
====
---- QUERY
# Corr() when window size is 2
select s_store_sk, corr(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between 1 preceding and current row) from tpcds.store;
---- RESULTS
5,NULL
8,1.0
12,-1.0
1,NULL
2,-1.0
3,NULL
4,-1.0
6,-1.0
7,-1.0
9,1.0
10,1.0
11,NULL
---- TYPES
int,double
====
---- QUERY
select s_store_sk, corr(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between current row and 1 following) from tpcds.store;
---- RESULTS
5,1.0
8,-1.0
12,NULL
1,-1.0
2,NULL
3,-1.0
4,-1.0
6,-1.0
7,1.0
9,1.0
10,NULL
11,NULL
---- TYPES
int,double
====
---- QUERY
select s_store_sk, corr(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between current row and unbounded following) from tpcds.store;
---- RESULTS
12,NULL
8,-1.0
5,-0.328248435346
11,NULL
10,NULL
9,1.0
7,0.965705311825
6,0.17735847272
4,-0.0690212865931
3,0.115190500757
2,0.294637370885
1,0.325760965741
---- TYPES
int,double
====
---- QUERY
select s_store_sk, corr(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between unbounded preceding and current row) from tpcds.store;
---- RESULTS
5,NULL
8,1.0
12,-0.328248435346
1,NULL
2,-1.0
3,-0.511481796057
4,-0.881269090479
6,-0.853760432996
7,0.0825005896565
9,0.0282032912339
10,0.223462144551
11,0.325760965741
---- TYPES
int,double
====
---- QUERY
# Sample and population covariance
select covar_samp(ps_availqty, ps_supplycost), covar_pop(ps_availqty, ps_supplycost) from tpch.partsupp;
---- RESULTS
267.607127282,267.606792773
---- TYPES
double,double
====
---- QUERY
# covar_samp() and covar_pop() on null table
select covar_samp(d, e), covar_pop(d, e) from functional.nulltable;
---- RESULTS
NULL,NULL
---- TYPES
double,double
====
---- QUERY
# covar_samp() on different datatypes
select covar_samp(tinyint_col, tinyint_col), covar_samp(smallint_col, smallint_col),
covar_samp(int_col, int_col), covar_samp(bigint_col, bigint_col), covar_samp(float_col, float_col),
covar_samp(double_col, double_col) from functional.alltypes;
---- RESULTS
8.251130291820797,8.251130291820797,8.251130291820797,825.1130291820797,9.983867246543589,841.6978010686292
---- TYPES
double,double,double,double,double,double
====
---- QUERY
# covar_pop() on different datatypes
select covar_pop(tinyint_col, tinyint_col), covar_pop(smallint_col, smallint_col),
covar_pop(int_col, int_col), covar_pop(bigint_col, bigint_col), covar_pop(float_col, float_col),
covar_pop(double_col, double_col) from functional.alltypes;
---- RESULTS
8.25,8.25,8.25,825.0,9.9824995935,841.5825
---- TYPES
double,double,double,double,double,double
====
---- QUERY
# Behavior of covar_samp() and covar_pop on empty table
select covar_samp(f2, f2), covar_pop(f2, f2) from functional.emptytable;
---- RESULTS
NULL,NULL
---- TYPES
double,double
====
---- QUERY
# covar_samp() and covar_pop() on timestamp datatype. The expression is divided by a large number like 1E+10
# because unlike overfow in int, overflow in double looses precision
select covar_samp(utctime, localtime)/1E+10, covar_pop(utctime, localtime)/1E+10 from functional.alltimezones;
---- RESULTS
5502.915940016994,5496.494801230509
---- TYPES
double,double
====
---- QUERY
# Since group by id will result in a single row, this test shows
# behavior of covar_samp() and covar_pop() on a single row
select id, covar_samp(int_col, int_col), covar_pop(int_col, int_col) from functional.alltypestiny group by id;
---- RESULTS
2,NULL,0.0
4,NULL,0.0
0,NULL,0.0
6,NULL,0.0
1,NULL,0.0
7,NULL,0.0
3,NULL,0.0
5,NULL,0.0
---- TYPES
int,double,double
====
---- QUERY
# covar_samp() and covar_pop() on decimal type
select covar_samp(d3, d4), covar_pop(d3, d4) from functional.decimal_tbl;
---- RESULTS
0.0,0.0
---- TYPES
double,double
====
---- QUERY
# covar_samp() and covar_pop() with group by clause
select year, covar_samp(double_col, double_col),
covar_pop(double_col, double_col) from functional.alltypes group by year;
---- RESULTS
2009,841.813133735,841.5825
2010,841.813133735,841.5825
---- TYPES
int,double,double
====
---- QUERY
# Example of negative covar_samp() and covar_pop()
select covar_samp(double_col, -double_col), covar_pop(double_col, -double_col) from functional.alltypes;
---- RESULTS
-841.697801069,-841.5825
---- TYPES
double,double
====
---- QUERY
select covar_samp(double_col, double_col), covar_pop(double_col, double_col) from functional.alltypes;
---- RESULTS
841.697801069,841.5825
---- TYPES
double,double
====
---- QUERY
select covar_samp(ss_sold_time_sk, ss_quantity), covar_pop(ss_sold_time_sk, ss_quantity) from tpcds.store_sales;
---- RESULTS
50.2400502845,50.2400315775
---- TYPES
double,double
====
---- QUERY
# covar_samp() on analytic query
select s_store_sk, covar_samp(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk)
from tpcds.store;
---- RESULTS
5,NULL
8,10414050.0
12,-5125047.666666668
1,NULL
2,-158355.0
3,-3513583.5
4,-19719420.0
6,-16848393.6
7,4309087.100000003
9,1337692.761904765
10,11663050.25
11,17362201.83333334
---- TYPES
int,double
====
---- QUERY
# covar_pop() on analytic query
select s_store_sk, covar_pop(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk)
from tpcds.store;
---- RESULTS
5,0.0
8,5207025.0
12,-3416698.444444445
1,0.0
2,-79177.5
3,-2342389.0
4,-14789565.0
6,-13478714.88
7,3590905.916666669
9,1146593.79591837
10,10205168.96875
11,15433068.2962963
---- TYPES
int,double
====
---- QUERY
select id, double_col, covar_samp(double_col, int_col) over (partition by month order by id) from
functional.alltypes order by id limit 10;
---- RESULTS
0,0.0,NULL
1,10.1,5.05
2,20.2,10.1
3,30.3,16.8333333333
4,40.4,25.25
5,50.5,35.35
6,60.6,47.1333333333
7,70.7,60.6
8,80.8,75.75
9,90.9,92.5833333333
---- TYPES
int,double,double
====
---- QUERY
select id, double_col, covar_pop(double_col, int_col) over (partition by month order by id) from
functional.alltypes order by id limit 10;
---- RESULTS
0,0.0,0.0
1,10.1,2.525
2,20.2,6.73333333333
3,30.3,12.625
4,40.4,20.2
5,50.5,29.4583333333
6,60.6,40.4
7,70.7,53.025
8,80.8,67.3333333333
9,90.9,83.325
---- TYPES
int,double,double
====
---- QUERY
# covar_samp() and covar_pop() when one column is filled with null
select covar_samp(null_int, rand()), covar_samp(rand(), null_int), covar_pop(null_int, rand()),
covar_pop(rand(), null_int) from functional.nullrows;
---- RESULTS
NULL,NULL,NULL,NULL
---- TYPES
double,double,double,double
====
---- QUERY
# covar_samp() and covar_pop() supporting join
select covar_samp(A.double_col, B.double_col), covar_pop(A.double_col, B.double_col) from functional.alltypes A,
functional.alltypes B where A.id=B.id;
---- RESULTS
841.697801069,841.5825
---- TYPES
double,double
====
---- QUERY
# covar_samp() and covar_pop() supporting timestamp datatype
select covar_samp(timestamp_col, timestamp_col)/1E+13, covar_pop(timestamp_col, timestamp_col)/1E+13
from functional.alltypes;
---- RESULTS
33.1559162368,33.1513743305
---- TYPES
double,double
====
---- QUERY
# corr(), covar_samp(), covar_pop() when both columns are filled with 0
select corr(double_col*0, double_col*0), covar_samp(double_col*0, double_col*0),
covar_pop(double_col*0, double_col*0) from functional.alltypes;
---- RESULTS
NULL,0.0,0.0
---- TYPES
double,double,double
====
---- QUERY
# corr(), covar_samp(), covar_pop() when one column is filled with 0
select corr(double_col, double_col*0), covar_samp(double_col, double_col*0),
covar_pop(double_col, double_col*0) from functional.alltypes;
---- RESULTS
NULL,0.0,0.0
---- TYPES
double,double,double
====
---- QUERY
select s_store_sk, covar_samp(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between unbounded preceding and current row) from tpcds.store;
---- RESULTS
5,NULL
8,10414050.0
12,-5125047.666666668
1,NULL
2,-158355.0
3,-3513583.5
4,-19719420.0
6,-16848393.6
7,4309087.100000003
9,1337692.761904765
10,11663050.25
11,17362201.83333334
---- TYPES
int,double
====
---- QUERY
select s_store_sk, covar_pop(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between unbounded preceding and current row) from tpcds.store;
---- RESULTS
5,0.0
8,5207025.0
12,-3416698.444444445
1,0.0
2,-79177.5
3,-2342389.0
4,-14789565.0
6,-13478714.88
7,3590905.916666669
9,1146593.79591837
10,10205168.96875
11,15433068.2962963
---- TYPES
int,double
====
---- QUERY
# Tests functioning of CovarRemoveState()
select s_store_sk, covar_samp(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between 5 preceding and 2 following) from tpcds.store;
---- RESULTS
5,-5125047.666666668
8,-5125047.666666668
12,-5125047.666666668
1,-3513583.5
2,-19719420.0
3,-16848393.6
4,4309087.100000003
6,1337692.761904765
7,11663050.25
9,14424596.67857143
10,3770362.571428577
11,-2206709.166666666
---- TYPES
int,double
====
---- QUERY
# Tests functioning of CovarRemoveState()
select s_store_sk, covar_pop(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between 5 preceding and 2 following) from tpcds.store;
---- RESULTS
5,-3416698.444444445
8,-3416698.444444445
12,-3416698.444444445
1,-2342389.0
2,-14789565.0
3,-13478714.88
4,3590905.916666669
6,1146593.79591837
7,10205168.96875
9,12621522.09375001
10,3231739.346938781
11,-1838924.305555555
---- TYPES
int,double
====
---- QUERY
# Covar_samp() when window size is 2
select s_store_sk, covar_samp(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between 1 preceding and current row) from tpcds.store;
---- RESULTS
5,NULL
8,10414050.0
12,-14211464
1,NULL
2,-158355.0
3,0
4,-16051572
6,-1733847.5
7,-2425526
9,25465544
10,26428357
11,0
---- TYPES
int,double
====
---- QUERY
# Covar_pop() when window size is 2
select s_store_sk, covar_pop(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between 1 preceding and current row) from tpcds.store;
---- RESULTS
5,0.0
8,5207025.0
12,-7105732
1,0.0
2,-79177.5
3,0
4,-8025786
6,-866923.75
7,-1212763
9,12732772
10,13214178.5
11,0
---- TYPES
int,double
====
---- QUERY
# regression function examples
select regr_slope(ps_availqty, ps_supplycost),
regr_intercept(ps_availqty, ps_supplycost), regr_r2(ps_availqty, ps_supplycost)
from tpch.partsupp;
---- RESULTS
0.003223046670647307,5001.613715742804,1.035868858574101e-07
---- TYPES
double, double, double
====
---- QUERY
# Behavior of regression functions on null table
select regr_slope(d, e), regr_intercept(d, e), regr_r2(d, e) from functional.nulltable;
---- RESULTS
NULL,NULL,NULL
---- TYPES
double, double, double
====
---- QUERY
# Behavior of regression functions on empty table
select regr_slope(f2, f2), regr_intercept(f2, f2), regr_r2(f2, f2) from functional.emptytable;
---- RESULTS
NULL,NULL,NULL
---- TYPES
double, double, double
====
---- QUERY
# regr_slope() on different datatypes
select regr_slope(tinyint_col, tinyint_col), regr_slope(smallint_col, smallint_col),
regr_slope(int_col, int_col), regr_slope(bigint_col, bigint_col), regr_slope(float_col, float_col),
regr_slope(double_col, double_col), regr_slope(timestamp_col, timestamp_col) from functional.alltypes;
---- RESULTS
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
---- TYPES
double, double, double, double, double, double, double
====
---- QUERY
# regr_intercept() on different datatypes
select regr_intercept(tinyint_col, tinyint_col), regr_intercept(smallint_col, smallint_col),
regr_intercept(int_col, int_col), regr_intercept(bigint_col, bigint_col), regr_intercept(float_col, float_col),
regr_intercept(double_col, double_col), round(regr_intercept(timestamp_col, timestamp_col), 5) from
functional.alltypes;
---- RESULTS
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
---- TYPES
double, double, double, double, double, double, double
====
---- QUERY
# regr_r2() on different datatypes
select regr_r2(tinyint_col, tinyint_col), regr_r2(smallint_col, smallint_col),
regr_r2(int_col, int_col), regr_r2(bigint_col, bigint_col), regr_r2(float_col, float_col),
regr_r2(double_col, double_col), regr_r2(timestamp_col, timestamp_col) from functional.alltypes;
---- RESULTS
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
---- TYPES
double, double, double, double, double, double, double
====
---- QUERY
# regr_slope(), regr_intercept(), regr_r2() on timestamp columns
select regr_slope(utctime, localtime), regr_intercept(utctime, localtime),
regr_r2(utctime, localtime) from functional.alltimezones;
---- RESULTS
0.9995840247725529, 593958.8067555428, 0.9999916154327646
---- TYPES
double, double, double
====
---- QUERY
# Since group by id will result in a single row, this test shows that regr_slope() returns null in case of a single row.
select id, regr_slope(int_col, int_col) from functional.alltypestiny group by id;
---- RESULTS
2,NULL
4,NULL
0,NULL
6,NULL
1,NULL
7,NULL
3,NULL
5,NULL
---- TYPES
int,double
====
---- QUERY
# Since group by id will result in a single row, this test shows that regr_intercept() returns null in
# case of a single row.
select id, regr_intercept(int_col, int_col) from functional.alltypestiny group by id;
---- RESULTS
2,NULL
4,NULL
0,NULL
6,NULL
1,NULL
7,NULL
3,NULL
5,NULL
---- TYPES
int,double
====
---- QUERY
# Since group by id will result in a single row, this test shows that regr_r2() returns null in case of a
# single row.
select id, regr_r2(int_col, int_col) from functional.alltypestiny group by id;
---- RESULTS
2,NULL
4,NULL
0,NULL
6,NULL
1,NULL
7,NULL
3,NULL
5,NULL
---- TYPES
int,double
====
---- QUERY
# regr_slope(), regr_intercept(), regr_r2() on decimal datatype
select regr_slope(d3, d4), regr_intercept(d3, d4), regr_r2(d3, d4) from functional.decimal_tbl;
---- RESULTS
NULL,NULL,NULL
---- TYPES
double, double, double
====
---- QUERY
select year, regr_slope(double_col, double_col) from functional.alltypes group by year;
---- RESULTS
2009,1.0
2010,1.0
---- TYPES
int,double
====
---- QUERY
select year, regr_intercept(double_col, double_col) from functional.alltypes group by year;
---- RESULTS
2009,0.0
2010,0.0
---- TYPES
int,double
====
---- QUERY
select year, regr_r2(double_col, double_col) from functional.alltypes group by year;
---- RESULTS
2009,1.0
2010,1.0
---- TYPES
int,double
====
---- QUERY
select regr_slope(double_col, -double_col), regr_intercept(double_col, -double_col),
regr_r2(double_col, -double_col) from functional.alltypes;
---- RESULTS
-1.0, 0.0, 1.0
---- TYPES
double, double, double
====
---- QUERY
select regr_slope(double_col, double_col), regr_intercept(double_col, double_col),
regr_r2(double_col, double_col) from functional.alltypes;
---- RESULTS
1.0, 0.0, 1.0
---- TYPES
double, double, double
====
---- QUERY
select regr_slope(ss_sold_time_sk, ss_quantity), regr_intercept(ss_sold_time_sk, ss_quantity)/10000,
regr_r2(ss_sold_time_sk, ss_quantity) from tpcds.store_sales;
---- RESULTS
0.0602719636627,5.1709905412,1.87116649337e-08
---- TYPES
double, double, double
====
---- QUERY
select s_store_sk, regr_slope(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk)
from tpcds.store;
---- RESULTS
5,NULL
8,4.80120606296e-06
12,-1.37354292706e-06
1,NULL
2,-0.000255754475703
3,-2.01036006341e-06
4,-5.05103424244e-06
6,-4.3565531677e-06
7,1.21229193717e-06
9,4.44553019714e-07
10,3.9044206462e-06
11,6.00483790103e-06
---- TYPES
int, double
====
---- QUERY
select s_store_sk, regr_intercept(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk)
from tpcds.store;
---- RESULTS
5,NULL
8, 244.41078639
12, 296.416240104
1,NULL
2, 1587.90537084399
3, 251.125599973
4, 268.395215604
6, 264.570040249
7, 234.323507488
9, 244.099933952
10, 223.133569068
11, 210.405303328
---- TYPES
int, double
====
---- QUERY
select s_store_sk, regr_r2(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk)
from tpcds.store;
---- RESULTS
5,NULL
8, 1.0
12, 0.107747035307
1,NULL
2, 1.0
3, 0.261613627698
4, 0.776635209834
6, 0.728906876949
7, 0.00680634729367
9, 0.000795425636424
10, 0.0499353300472
11, 0.1061202068
---- TYPES
int, double
====
---- QUERY
select id, double_col, regr_slope(double_col, int_col) over (partition by month order by id) from functional.alltypes
order by id limit 10;
---- RESULTS
0,0.0,NULL
1,10.1,10.1
2,20.2,10.1
3,30.3,10.1
4,40.4,10.1
5,50.5,10.1
6,60.6,10.1
7,70.7,10.1
8,80.8,10.1
9,90.9,10.1
---- TYPES
int,double,double
====
---- QUERY
select id, double_col, regr_intercept(double_col, int_col) over (partition by month order by id) from functional.alltypes
order by id limit 10;
---- RESULTS
0, 0.0,NULL
1, 10.1, 0.0
2, 20.2, 0.0
3, 30.3, 1.7763568394e-15
4, 40.4, 0.0
5, 50.5, 0.0
6, 60.6, 3.5527136788e-15
7, 70.7, -7.1054273576e-15
8, 80.8, -7.1054273576e-15
9, 90.9, -7.1054273576e-15
---- TYPES
int,double,double
====
---- QUERY
select id, double_col, regr_r2(double_col, int_col) over (partition by month order by id) from functional.alltypes
order by id limit 10;
---- RESULTS
0,0.0,NULL
1,10.1,1.0
2,20.2,1.0
3,30.3,1.0
4,40.4,1.0
5,50.5,1.0
6,60.6,1.0
7,70.7,1.0
8,80.8,1.0
9,90.9,1.0
---- TYPES
int,double,double
====
---- QUERY
# Regression functions when one column is filled with null
select regr_slope(null_int, rand()), regr_slope(rand(), null_int), regr_intercept(null_int, rand()),
regr_intercept(rand(), null_int), regr_r2(null_int, rand()), regr_r2(rand(), null_int)
from functional.nullrows;
---- RESULTS
NULL,NULL,NULL,NULL,NULL,NULL
---- TYPES
double, double, double, double, double, double
====
---- QUERY
# Regression functions supporting join
select regr_slope(A.double_col, B.double_col), regr_intercept(A.double_col, B.double_col),
regr_r2(A.double_col, B.double_col) from functional.alltypes A, functional.alltypes B where A.id=B.id;
---- RESULTS
1.0, 0.0, 1.0
---- TYPES
double, double, double
====
---- QUERY
# Tests functioning of RegrSlopeRemoveState()
select s_store_sk, regr_slope(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between 5 preceding and 2 following) from tpcds.store;
---- RESULTS
5,-1.37354292706e-06
8,-1.37354292706e-06
12,-1.37354292706e-06
1,-2.01036006341e-06
2,-5.05103424244e-06
3,-4.3565531677e-06
4,1.21229193717e-06
6,4.44553019714e-07
7,3.9044206462e-06
9,6.59936024162e-06
10,4.13595759719e-06
11,-2.68509660433e-06
---- TYPES
int,double
====
---- QUERY
# Tests functioning of RegrSlopeRemoveState() for regr_intercept()
select s_store_sk, regr_intercept(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between 5 preceding and 2 following) from tpcds.store;
---- RESULTS
5, 296.416240104
8, 296.416240104
12, 296.416240104
1, 251.125599973
2, 268.395215604
3, 264.570040249
4, 234.323507488
6, 244.099933952
7, 223.133569068
9, 205.13592892
10, 226.988621372
11, 290.843308372
---- TYPES
int,double
====
---- QUERY
# Tests functioning of CorrRemoveState() for regr_r2()
select s_store_sk, regr_r2(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between 5 preceding and 2 following) from tpcds.store;
---- RESULTS
5, 0.107747035307
8, 0.107747035307
12, 0.107747035307
1, 0.261613627698
2, 0.776635209834
3, 0.728906876949
4, 0.00680634729367
6, 0.000795425636424
7, 0.0499353300472
9, 0.0868111803219
10, 0.0132688514648
11, 0.00476393800297
---- TYPES
int,double
====
---- QUERY
# Mathematical operations on double can lead to variance becoming negative by a very small amount (around +1e-13),
# to avoid that a check is added (state->xvar < 0.0 || state->yvar <= 0.0), without which the below test will
# result in nan for certain cases.
# Testcase when dependednt variable becomes negative:
select s_store_sk, regr_r2(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between 1 preceding and 1 following) from tpcds.store;
---- RESULTS
5, 1.0
8, 0.107747035307
12, 1.0
1, 1.0
2, 0.261613627698
3, 0.687656618674
4, 0.782782611877
6, 0.541316438509
7, 0.0272991489992
9, 0.942195083603
10, 1.0
11,NULL
---- TYPES
int,double
====
---- QUERY
# regr_slope() when window size is 2
select s_store_sk, regr_slope(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between 1 preceding and current row) from tpcds.store;
---- RESULTS
5,NULL
8,4.80120606296e-06
12,-9.00681309118e-06
1,NULL
2,-0.000255754475703
3,0.0
4,-1.00924694479e-05
6,-3.48934955352e-05
7,-0.000953195306915
9,1.32728364256e-05
10,1.00081893097e-05
11,NULL
---- TYPES
int,double
====
---- QUERY
# regr_intercept() when window size is 2
select s_store_sk, regr_intercept(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between 1 preceding and current row) from tpcds.store;
---- RESULTS
5,NULL
8, 244.41078638954104
12, 341.01161935181347
1,NULL
2, 1587.9053708439897
3, 236.0
4, 312.2784702956196
6, 543.9564370568922
7, 8832.752449571763
9, 178.14330273093714
10, 200.98275763037407
11,NULL
---- TYPES
int,double
====
---- QUERY
# regr_r2() when window size is 2
select s_store_sk, regr_r2(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between 1 preceding and current row) from tpcds.store;
---- RESULTS
5,NULL
8, 1.0
12, 1.0
1,NULL
2, 1.0
3, 1.0
4, 1.0
6, 1.0
7, 1.0
9, 1.0
10, 1.0
11,NULL
---- TYPES
int,double
====
---- QUERY
select s_store_sk, regr_slope(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between current row and 1 following) from tpcds.store;
---- RESULTS
5,4.80120606296e-06
8,-9.00681309118e-06
12,NULL
1,-0.000255754475703
2,0.0
3,-1.00924694479e-05
4,-3.48934955352e-05
6,-0.000953195306915
7,1.32728364256e-05
9,1.00081893097e-05
10,NULL
11,NULL
---- TYPES
int,double
====
---- QUERY
select s_store_sk, regr_intercept(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between current row and 1 following) from tpcds.store;
---- RESULTS
5, 244.41078639
8, 341.011619352
12,NULL
1, 1587.90537084399
2, 236.0
3, 312.278470296
4, 543.956437057
6, 8832.752449571763
7, 178.143302731
9, 200.98275763
10,NULL
11,NULL
---- TYPES
int,double
====
---- QUERY
select s_store_sk, regr_r2(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between current row and 1 following) from tpcds.store;
---- RESULTS
5, 1.0
8, 1.0
12,NULL
1, 1.0
2, 1.0
3, 1.0
4, 1.0
6, 1.0
7, 1.0
9, 1.0
10,NULL
11,NULL
---- TYPES
int,double
====
---- QUERY
select s_store_sk, regr_slope(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between current row and unbounded following) from tpcds.store;
---- RESULTS
12,NULL
8,-9.00681309118e-06
5,-1.37354292706e-06
11,NULL
10,NULL
9,1.00081893097e-05
7,1.05678876878e-05
6,5.25458148115e-06
4,-2.68509660433e-06
3,4.13595759719e-06
2,6.59936024162e-06
1,6.00483790103e-06
---- TYPES
int,double
====
---- QUERY
select s_store_sk, regr_intercept(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between current row and unbounded following) from tpcds.store;
---- RESULTS
12,NULL
8, 341.011619352
5, 296.416240104
11,NULL
10,NULL
9, 200.98275763
7, 197.748657023
6, 231.216488956
4, 290.843308372
3, 226.988621372
2, 205.13592892
1, 210.405303328
---- TYPES
int,double
====
---- QUERY
select s_store_sk, regr_r2(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between current row and unbounded following) from tpcds.store;
---- RESULTS
12,NULL
8, 1.0
5, 0.107747035307
11,NULL
10,NULL
9, 1.0
7, 0.932586749287
6, 0.0314560278457
4, 0.00476393800297
3, 0.0132688514648
2, 0.0868111803219
1, 0.1061202068
---- TYPES
int,double
====
---- QUERY
select s_store_sk, regr_slope(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between unbounded preceding and current row) from tpcds.store;
---- RESULTS
5,NULL
8,4.80120606296e-06
12,-1.37354292706e-06
1,NULL
2,-0.000255754475703
3,-2.01036006341e-06
4,-5.05103424244e-06
6,-4.3565531677e-06
7,1.21229193717e-06
9,4.44553019714e-07
10,3.9044206462e-06
11,6.00483790103e-06
---- TYPES
int,double
====
---- QUERY
select s_store_sk, regr_intercept(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between unbounded preceding and current row) from tpcds.store;
---- RESULTS
5,NULL
8, 244.41078639
12, 296.416240104
1,NULL
2, 1587.90537084399
3, 251.125599973
4, 268.395215604
6, 264.570040249
7, 234.323507488
9, 244.099933952
10, 223.133569068
11, 210.405303328
---- TYPES
int,double
====
---- QUERY
select s_store_sk, regr_r2(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between unbounded preceding and current row) from tpcds.store;
---- RESULTS
5,NULL
8, 1.0
12, 0.10774703530702502
1,NULL
2, 1.0
3, 0.26161362769774743
4, 0.7766352098335481
6, 0.7289068769493157
7, 0.006806347293667032
9, 7.954256364237451E-4
10, 0.049935330047174945
11, 0.10612020680026239
---- TYPES
int,double
====
---- QUERY
# Testcase when independent variable becomes negative:
select s_store_sk, regr_r2(s_floor_space, s_number_employees) over (partition by s_city order by
s_store_sk rows between 1 preceding and 1 following) from tpcds.store;
---- RESULTS
5, 1.0
8, 0.107747035307
12, 1.0
1, 1.0
2, 0.261613627698
3, 0.687656618674
4, 0.782782611877
6, 0.541316438509
7, 0.0272991489992
9, 0.942195083603
10, 1.0
11,NULL
---- TYPES
int, double
====
---- QUERY
# regr_count() function examples
select regr_count(ps_availqty, ps_supplycost) from tpch.partsupp;
---- RESULTS
800000
---- TYPES
bigint
====
---- QUERY
# Behavior of regr_count() on null table
select regr_count(d, e) from functional.nulltable;
---- RESULTS
0
---- TYPES
bigint
====
---- QUERY
# Behavior of regr_count() on empty table
select regr_count(f2, f2) from functional.emptytable;
---- RESULTS
0
---- TYPES
bigint
====
---- QUERY
# regr_count() on different datatypes
select regr_count(tinyint_col, tinyint_col), regr_count(smallint_col, smallint_col),
regr_count(int_col, int_col), regr_count(bigint_col, bigint_col), regr_count(float_col, float_col),
regr_count(double_col, double_col), regr_count(timestamp_col, timestamp_col) from functional.alltypes;
---- RESULTS
7300,7300,7300,7300,7300,7300,7300
---- TYPES
bigint,bigint,bigint,bigint,bigint,bigint,bigint
====
---- QUERY
# regr_count() on timestamp columns
select regr_count(utctime, localtime) from functional.alltimezones;
---- RESULTS
857
---- TYPES
bigint
====
---- QUERY
# Since group by id will result in a single row, this test shows that regr_count() returns 1 in case of a single row.
select id, regr_count(int_col, int_col) from functional.alltypestiny group by id;
---- RESULTS
2,1
4,1
0,1
6,1
1,1
7,1
3,1
5,1
---- TYPES
int,bigint
====
---- QUERY
# regr_count() on decimal datatype
select regr_count(d3, d4) from functional.decimal_tbl;
---- RESULTS
5
---- TYPES
bigint
====
---- QUERY
select year, regr_count(double_col, double_col) from functional.alltypes group by year;
---- RESULTS
2009,3650
2010,3650
---- TYPES
int,bigint
====
---- QUERY
select regr_count(double_col, -double_col) from functional.alltypes;
---- RESULTS
7300
---- TYPES
bigint
====
---- QUERY
select regr_count(double_col, double_col) from functional.alltypes;
---- RESULTS
7300
---- TYPES
bigint
====
---- QUERY
select regr_count(ss_sold_time_sk, ss_quantity) from tpcds.store_sales;
---- RESULTS
2685616
---- TYPES
bigint
====
---- QUERY
select s_store_sk, regr_count(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk)
from tpcds.store;
---- RESULTS
5,1
8,2
12,3
1,1
2,2
3,3
4,4
6,5
7,6
9,7
10,8
11,9
---- TYPES
int,bigint
====
---- QUERY
select id, double_col, regr_count(double_col, int_col) over (partition by month order by id) from functional.alltypes
order by id limit 10;
---- RESULTS
0,0.0,1
1,10.1,2
2,20.2,3
3,30.3,4
4,40.4,5
5,50.5,6
6,60.6,7
7,70.7,8
8,80.8,9
9,90.9,10
---- TYPES
int,double,bigint
====
---- QUERY
# regr_count() when one column is filled with null
select regr_count(null_int, rand()), regr_count(rand(), null_int) from functional.nullrows;
---- RESULTS
0,0
---- TYPES
bigint,bigint
====
---- QUERY
# regr_count() supporting join
select regr_count(A.double_col, B.double_col) from functional.alltypes A, functional.alltypes B where A.id=B.id;
---- RESULTS
7300
---- TYPES
bigint
====
---- QUERY
# Tests functioning of regrCountRemove()
select s_store_sk, regr_count(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between 5 preceding and 2 following) from tpcds.store;
---- RESULTS
5,3
8,3
12,3
1,3
2,4
3,5
4,6
6,7
7,8
9,8
10,7
11,6
---- TYPES
int,bigint
====
---- QUERY
# regr_count() when window size is 2
select s_store_sk, regr_count(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between 1 preceding and current row) from tpcds.store;
---- RESULTS
5,1
8,2
12,2
1,1
2,2
3,2
4,2
6,2
7,2
9,2
10,2
11,2
---- TYPES
int,bigint
====
---- QUERY
select s_store_sk, regr_count(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between current row and 1 following) from tpcds.store;
---- RESULTS
5,2
8,2
12,1
1,2
2,2
3,2
4,2
6,2
7,2
9,2
10,2
11,1
---- TYPES
int,bigint
====
---- QUERY
select s_store_sk, regr_count(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between current row and unbounded following) from tpcds.store;
---- RESULTS
12,1
8,2
5,3
11,1
10,2
9,3
7,4
6,5
4,6
3,7
2,8
1,9
---- TYPES
int,bigint
====
---- QUERY
select s_store_sk, regr_count(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk
rows between unbounded preceding and current row) from tpcds.store;
---- RESULTS
5,1
8,2
12,3
1,1
2,2
3,3
4,4
6,5
7,6
9,7
10,8
11,9
---- TYPES
int,bigint
====
---- QUERY
# Regression test for IMPALA-13873: the conjunct 'b = min_b' in the aggregation node
# should not be discarded.
WITH u1 AS (select 10 a, 10 b),
t AS (select a, b, min(b) over (partition by a) min_b from u1 UNION select 10, 10, 20)
select t.* from t where t.b = t.min_b;
---- RESULTS
10,10,10
---- TYPES
TINYINT,TINYINT,TINYINT
====
---- QUERY
# Regression test for IMPALA-13873: the conjunct 'b = min_b' in the aggregation node
# should not be discarded.
# Querying from tables, not inline expressions.
WITH t AS (
select d2 a, d3 b, min(d3) over (partition by d2) min_b from functional.decimal_tbl
UNION
select smallint_col, int_col, bigint_col from functional.alltypestiny
)
select t.* from t where t.b = t.min_b;
---- RESULTS
2222,1.2345678900,1.2345678900
0,0.0000000000,0.0000000000
333,123.4567890000,123.4567890000
111,12.3456789000,12.3456789000
---- TYPES
decimal,decimal,decimal
====
---- QUERY
# Regression test for IMPALA-13873: the conjunct 'b = min_b' in the aggregation node
# should not be discarded.
# The predicate cannot be pushed down to any UNION operand.
WITH u1 AS (select tinyint_col, id from functional.alltypestiny),
t AS (
select tinyint_col, id, min(id) over (partition by tinyint_col) min_id from u1
UNION
select tinyint_col, id, id+100 as min_id from u1)
select t.* from t where t.id = t.min_id;
---- RESULTS
1,1,1
0,0,0
---- TYPES
tinyint,int,bigint
====