| ==== |
| ---- QUERY |
| # count(distinct) |
| select count(distinct smallint_col, tinyint_col) |
| from alltypesagg |
| ---- RESULTS |
| 90 |
| ---- TYPES |
| bigint |
| ==== |
| ---- QUERY |
| select count(distinct bool_col) |
| from alltypesagg |
| ---- RESULTS |
| 2 |
| ---- TYPES |
| bigint |
| ==== |
| ---- QUERY |
| select count(distinct int_col) |
| from alltypesagg |
| ---- RESULTS |
| 999 |
| ---- TYPES |
| bigint |
| ==== |
| ---- QUERY |
| select count(distinct float_col) |
| from alltypesagg |
| ---- RESULTS |
| 999 |
| ---- TYPES |
| bigint |
| ==== |
| ---- QUERY |
| select count(distinct double_col) |
| from alltypesagg |
| ---- RESULTS |
| 999 |
| ---- TYPES |
| bigint |
| ==== |
| ---- QUERY |
| select count(distinct string_col) |
| from alltypesagg |
| ---- RESULTS |
| 1000 |
| ---- TYPES |
| bigint |
| ==== |
| ---- QUERY |
| select count(distinct date_string_col) |
| from alltypesagg |
| ---- RESULTS |
| 10 |
| ---- TYPES |
| bigint |
| ==== |
| ---- QUERY |
| select count(distinct timestamp_col) |
| from alltypesagg |
| ---- RESULTS |
| 10000 |
| ---- TYPES |
| bigint |
| ==== |
| ---- QUERY |
| # count(distinct) w/ grouping and non-distinct count() |
| select tinyint_col, count(distinct smallint_col), count(smallint_col) |
| from alltypesagg group by 1 |
| ---- RESULTS |
| 2,10,1000 |
| 3,10,1000 |
| NULL,9,1800 |
| 4,10,1000 |
| 8,10,1000 |
| 5,10,1000 |
| 9,10,1000 |
| 7,10,1000 |
| 1,10,1000 |
| 6,10,1000 |
| ---- TYPES |
| tinyint, bigint, bigint |
| ==== |
| ---- QUERY |
| # count(distinct) w/ grouping and non-distinct count() |
| select tinyint_col, count(distinct int_col, smallint_col), count(smallint_col) |
| from alltypesagg group by 1 |
| ---- RESULTS |
| 2,100,1000 |
| 3,100,1000 |
| NULL,90,1800 |
| 4,100,1000 |
| 8,100,1000 |
| 5,100,1000 |
| 9,100,1000 |
| 7,100,1000 |
| 1,100,1000 |
| 6,100,1000 |
| ---- TYPES |
| tinyint, bigint, bigint |
| ==== |
| ---- QUERY |
| # count(distinct) and sum(distinct) w/ grouping and non-distinct count() |
| select tinyint_col, count(distinct smallint_col), sum(distinct smallint_col), |
| count(smallint_col) |
| from alltypesagg group by 1 |
| ---- RESULTS |
| 2,10,470,1000 |
| 3,10,480,1000 |
| NULL,9,450,1800 |
| 4,10,490,1000 |
| 8,10,530,1000 |
| 5,10,500,1000 |
| 9,10,540,1000 |
| 7,10,520,1000 |
| 1,10,460,1000 |
| 6,10,510,1000 |
| ---- TYPES |
| tinyint, bigint, bigint, bigint |
| ==== |
| ---- QUERY |
| # count(distinct) and sum(distinct) w/ grouping; distinct in min() and max() |
| # ignored |
| select tinyint_col, count(distinct smallint_col), sum(distinct smallint_col), |
| count(smallint_col), min(distinct int_col), max(distinct float_col) |
| from alltypesagg group by 1 |
| ---- RESULTS |
| 2,10,470,1000,2,1091.199951171875 |
| 3,10,480,1000,3,1092.300048828125 |
| NULL,9,450,1800,10,1089 |
| 4,10,490,1000,4,1093.400024414062 |
| 8,10,530,1000,8,1097.800048828125 |
| 5,10,500,1000,5,1094.5 |
| 9,10,540,1000,9,1098.900024414062 |
| 7,10,520,1000,7,1096.699951171875 |
| 1,10,460,1000,1,1090.099975585938 |
| 6,10,510,1000,6,1095.599975585938 |
| ---- TYPES |
| tinyint, bigint, bigint, bigint, int, float |
| ==== |
| ---- QUERY |
| # count distinct order by the same agg expr |
| select count(distinct id) as count_id from alltypessmall order by count_id limit 100 |
| ---- RESULTS |
| 100 |
| ---- TYPES |
| bigint |
| ==== |
| ---- QUERY |
| # count distinct order by a diff agg expr |
| select count(distinct id) as sum_id from alltypessmall order by max(distinct id) |
| limit 100 |
| ---- RESULTS |
| 100 |
| ---- TYPES |
| bigint |
| ==== |
| ---- QUERY |
| # distinct * |
| select distinct * |
| from alltypesagg |
| where id < 20 |
| ---- RESULTS |
| 6,true,6,6,6,60,6.599999904632568,60.59999999999999,'01/01/10','6',2010-01-01 00:06:00.150000000,2010,1,1 |
| 14,true,4,14,14,140,15.39999961853027,141.4,'01/01/10','14',2010-01-01 00:14:00.910000000,2010,1,1 |
| 18,true,8,18,18,180,19.79999923706055,181.8,'01/01/10','18',2010-01-01 00:18:01.530000000,2010,1,1 |
| 19,false,9,19,19,190,20.89999961853027,191.9,'01/01/10','19',2010-01-01 00:19:01.710000000,2010,1,1 |
| 1,false,1,1,1,10,1.100000023841858,10.1,'01/01/10','1',2010-01-01 00:01:00,2010,1,1 |
| 2,true,2,2,2,20,2.200000047683716,20.2,'01/01/10','2',2010-01-01 00:02:00.100000000,2010,1,1 |
| 0,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/01/10','0',2010-01-01 00:00:00,2010,1,1 |
| 7,false,7,7,7,70,7.699999809265137,70.7,'01/01/10','7',2010-01-01 00:07:00.210000000,2010,1,1 |
| 9,false,9,9,9,90,9.899999618530273,90.89999999999999,'01/01/10','9',2010-01-01 00:09:00.360000000,2010,1,1 |
| 11,false,1,11,11,110,12.10000038146973,111.1,'01/01/10','11',2010-01-01 00:11:00.550000000,2010,1,1 |
| 3,false,3,3,3,30,3.299999952316284,30.3,'01/01/10','3',2010-01-01 00:03:00.300000000,2010,1,1 |
| 15,false,5,15,15,150,16.5,151.5,'01/01/10','15',2010-01-01 00:15:01.500000000,2010,1,1 |
| 10,true,NULL,10,10,100,11,101,'01/01/10','10',2010-01-01 00:10:00.450000000,2010,1,1 |
| 17,false,7,17,17,170,18.70000076293945,171.7,'01/01/10','17',2010-01-01 00:17:01.360000000,2010,1,1 |
| 5,false,5,5,5,50,5.5,50.5,'01/01/10','5',2010-01-01 00:05:00.100000000,2010,1,1 |
| 10,true,NULL,10,10,100,11,101,'01/01/10','10',2010-01-01 00:10:00.450000000,2010,1,NULL |
| 8,true,8,8,8,80,8.800000190734863,80.8,'01/01/10','8',2010-01-01 00:08:00.280000000,2010,1,1 |
| 13,false,3,13,13,130,14.30000019073486,131.3,'01/01/10','13',2010-01-01 00:13:00.780000000,2010,1,1 |
| 12,true,2,12,12,120,13.19999980926514,121.2,'01/01/10','12',2010-01-01 00:12:00.660000000,2010,1,1 |
| 16,true,6,16,16,160,17.60000038146973,161.6,'01/01/10','16',2010-01-01 00:16:01.200000000,2010,1,1 |
| 4,true,4,4,4,40,4.400000095367432,40.4,'01/01/10','4',2010-01-01 00:04:00.600000000,2010,1,1 |
| 0,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/01/10','0',2010-01-01 00:00:00,2010,1,NULL |
| ---- TYPES |
| int, boolean, tinyint, smallint, int, bigint, float, double, string, string, timestamp, int, int, int |
| ==== |
| ---- QUERY |
| # distinct w/ explicit select list |
| select distinct bool_col, tinyint_col |
| from alltypesagg |
| ---- RESULTS |
| false,1 |
| false,3 |
| false,5 |
| false,7 |
| false,9 |
| true,2 |
| true,4 |
| true,6 |
| true,8 |
| true,NULL |
| ---- TYPES |
| boolean, tinyint |
| ==== |
| ---- QUERY |
| # test count distinct with nulls w/o groupby |
| select count(distinct NULL) from alltypesagg |
| ---- RESULTS |
| 0 |
| ---- TYPES |
| bigint |
| ==== |
| ---- QUERY |
| # test count distinct with nulls w/o groupby |
| select count(distinct int_col, NULL) from alltypesagg |
| ---- RESULTS |
| 0 |
| ---- TYPES |
| bigint |
| ==== |
| ---- QUERY |
| # test count distinct with nulls and groupby |
| select tinyint_col, count(distinct NULL) from alltypesagg group by tinyint_col |
| order by tinyint_col limit 10 |
| ---- RESULTS |
| 1,0 |
| 2,0 |
| 3,0 |
| 4,0 |
| 5,0 |
| 6,0 |
| 7,0 |
| 8,0 |
| 9,0 |
| NULL,0 |
| ---- TYPES |
| tinyint, bigint |
| ==== |
| ---- QUERY |
| select tinyint_col, count(distinct int_col, NULL) from alltypesagg group by tinyint_col |
| order by tinyint_col limit 10 |
| ---- RESULTS |
| 1,0 |
| 2,0 |
| 3,0 |
| 4,0 |
| 5,0 |
| 6,0 |
| 7,0 |
| 8,0 |
| 9,0 |
| NULL,0 |
| ---- TYPES |
| tinyint, bigint |
| ==== |
| ---- QUERY |
| # make sure we still return the NULL group even though NULLs are not added to COUNT |
| select tinyint_col, count(distinct tinyint_col, NULL) from alltypesagg group by tinyint_col |
| order by tinyint_col limit 10 |
| ---- RESULTS |
| 1,0 |
| 2,0 |
| 3,0 |
| 4,0 |
| 5,0 |
| 6,0 |
| 7,0 |
| 8,0 |
| 9,0 |
| NULL,0 |
| ---- TYPES |
| tinyint, bigint |
| ==== |
| ---- QUERY |
| # test join on inline views containing distinct aggregates to make sure |
| # the aggregation info reports the correct tuple ids (from the 2nd phase |
| # distinct aggregation) for the inline-view expression substitution |
| select t1.c, t2.c from |
| (select count(distinct int_col) as c from functional.alltypestiny) t1 inner join |
| (select count(distinct bigint_col) as c from functional.alltypestiny) t2 on (t1.c = t2.c) |
| ---- RESULTS |
| 2,2 |
| ---- TYPES |
| bigint, bigint |
| ==== |
| ---- QUERY |
| # Test distinct + non-distinct aggregates without group by and having clause (IMPALA-845) |
| # alltypessmall has 100 non-null values for bigint_col, expect a single result |
| select count(distinct tinyint_col) from functional.alltypessmall |
| having count(bigint_col) > 99 |
| ---- RESULTS |
| 10 |
| ---- TYPES |
| bigint |
| ==== |
| ---- QUERY |
| # Test distinct + non-distinct aggregates without group by and having clause (IMPALA-845) |
| # alltypessmall has 100 non-null values for bigint_col, expect an empty result |
| select count(distinct tinyint_col) from functional.alltypessmall |
| having count(bigint_col) > 100 |
| ---- RESULTS |
| ---- TYPES |
| bigint |
| ==== |
| ---- QUERY |
| # Regression test for COUNT(ALL ) with no group-by and other distinct agg. IMPALA-862 |
| select count(*), COUNT(distinct 1) from alltypesagg where false |
| ---- RESULTS |
| 0,0 |
| ---- TYPES |
| bigint, bigint |
| ==== |
| ---- QUERY |
| # Regression test for COUNT(ALL ) with no group-by and other distinct agg. IMPALA-862 |
| select count(tinyint_col), sum(distinct int_col) from alltypesagg |
| ---- RESULTS |
| 9000,499500 |
| ---- TYPES |
| bigint, bigint |
| ==== |
| ---- QUERY |
| # Regression test for COUNT(ALL ) with no group-by and other distinct agg. IMPALA-862 |
| select count(*), COUNT(distinct 1) from alltypesagg |
| ---- RESULTS |
| 11000,1 |
| ---- TYPES |
| bigint, bigint |
| ==== |
| ---- QUERY |
| # Test distinct + non-distinct aggregation with intermediate tuples (IMPALA-1247). |
| select avg(int_col), count(int_col), avg(distinct bigint_col) |
| from functional.alltypes |
| ---- RESULTS |
| 4.5,7300,45 |
| ---- TYPES |
| double, bigint, double |
| ==== |
| ---- QUERY |
| # Test distinct + non-distinct aggregation with intermediate tuples (IMPALA-1247). |
| select bool_col, avg(int_col), count(int_col), avg(distinct bigint_col) |
| from functional.alltypes |
| group by bool_col |
| order by bool_col |
| ---- RESULTS |
| false,5,3650,50 |
| true,4,3650,40 |
| ---- TYPES |
| boolean, double, bigint, double |
| ==== |
| ---- QUERY |
| # Test rewriting count(distinct) into NDV() via a query option. |
| set appx_count_distinct=true; |
| select count(distinct int_col), count(distinct float_col), count(distinct string_col) |
| from alltypesagg |
| ---- RESULTS |
| 957,1016,963 |
| ---- TYPES |
| bigint, bigint, bigint |
| ==== |
| ---- QUERY |
| # Large (more than 1 regular batches) distinct with multiple group by keys. |
| SELECT COUNT(*) FROM |
| (SELECT COUNT(DISTINCT p_partkey) |
| FROM tpch_parquet.part GROUP BY p_brand, p_type, p_size) v |
| ---- RESULTS |
| 123039 |
| ---- TYPES |
| bigint |
| ==== |