blob: 822a3c445eba98dc4e68f305dfb1e3955829a510 [file] [log] [blame]
# agg.iq - Aggregate functions
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
!use post
!set outputformat mysql
# count(*) returns number of rows in table
select count(ename) as c from emp;
+---+
| C |
+---+
| 9 |
+---+
(1 row)
!ok
# count of not-nullable column same as count(*)
select count(ename) as c from emp;
+---+
| C |
+---+
| 9 |
+---+
(1 row)
!ok
# count of nullable column
select count(deptno) as c from emp;
+---+
| C |
+---+
| 8 |
+---+
(1 row)
!ok
# composite count
select count(deptno, ename, 1, deptno) as c from emp;
+---+
| C |
+---+
| 8 |
+---+
(1 row)
!ok
# DISTINCT and GROUP BY
select distinct deptno, count(*) as c from emp group by deptno;
+--------+---+
| DEPTNO | C |
+--------+---+
| 10 | 2 |
| 20 | 1 |
| 30 | 2 |
| 50 | 2 |
| 60 | 1 |
| | 1 |
+--------+---+
(6 rows)
!ok
select distinct deptno from emp group by deptno;
+--------+
| DEPTNO |
+--------+
| 10 |
| 20 |
| 30 |
| 50 |
| 60 |
| |
+--------+
(6 rows)
!ok
select distinct count(*) as c from emp group by deptno;
+---+
| C |
+---+
| 1 |
| 2 |
+---+
(2 rows)
!ok
select distinct count(*) as c from emp group by deptno having count(*) > 1;
+---+
| C |
+---+
| 2 |
+---+
(1 row)
!ok
select distinct count(*) as c from emp group by deptno order by deptno desc;
Expression 'DEPTNO' is not in the select clause
!error
select distinct count(*) as c from emp group by deptno order by 1 desc;
+---+
| C |
+---+
| 2 |
| 1 |
+---+
(2 rows)
!ok
# [CALCITE-2192] RelBuilder wrongly skips creation of Aggregate that prunes
# columns if input is unique
select distinct deptno
from (select deptno, count(*) from emp group by deptno);
+--------+
| DEPTNO |
+--------+
| 10 |
| 20 |
| 30 |
| 50 |
| 60 |
| |
+--------+
(6 rows)
!ok
# [CALCITE-998] Exception when calling STDDEV_SAMP, STDDEV_POP
# stddev_samp
select stddev_samp(deptno) as s from emp;
+----+
| S |
+----+
| 19 |
+----+
(1 row)
!ok
# [CALCITE-3815] Add missing SQL standard aggregate
# functions: EVERY, SOME, INTERSECTION
select some(deptno = 100), every(deptno > 0), intersection(multiset[1, 2]) from emp;
+--------+--------+--------+
| EXPR$0 | EXPR$1 | EXPR$2 |
+--------+--------+--------+
| false | true | [1, 2] |
+--------+--------+--------+
(1 row)
!ok
select some(deptno > 100), every(deptno > 0) from emp where deptno > 1000;
+--------+--------+
| EXPR$0 | EXPR$1 |
+--------+--------+
| | |
+--------+--------+
(1 row)
!ok
# stddev_pop
select stddev_pop(deptno) as s from emp;
+----+
| S |
+----+
| 17 |
+----+
(1 row)
!ok
# stddev
select stddev(deptno) as s from emp;
+----+
| S |
+----+
| 19 |
+----+
(1 row)
!ok
# both
select gender,
stddev_pop(deptno) as p,
stddev_samp(deptno) as s,
stddev(deptno) as ss,
count(deptno) as c
from emp
group by gender;
+--------+----+----+----+---+
| GENDER | P | S | SS | C |
+--------+----+----+----+---+
| F | 17 | 19 | 19 | 5 |
| M | 17 | 20 | 20 | 3 |
+--------+----+----+----+---+
(2 rows)
!ok
select city, gender as c from emps;
+---------------+---+
| CITY | C |
+---------------+---+
| Vancouver | F |
| San Francisco | M |
| | |
| Vancouver | M |
| | F |
+---------------+---+
(5 rows)
!ok
# SELECT DISTINCT includes fully and partially null rows
select distinct city, gender from emps;
+---------------+--------+
| CITY | GENDER |
+---------------+--------+
| | |
| Vancouver | M |
| | F |
| San Francisco | M |
| Vancouver | F |
+---------------+--------+
(5 rows)
!ok
# COUNT excludes fully or partially null rows
select count(city, gender) as c from emps;
+---+
| C |
+---+
| 3 |
+---+
(1 row)
!ok
# COUNT-DISTINCT excludes fully or partially null rows
select count(distinct city, gender) as c from emps;
+---+
| C |
+---+
| 3 |
+---+
(1 row)
!ok
select distinct mod(deptno, 20) as m, gender as c from emps;
+----+---+
| M | C |
+----+---+
| 0 | F |
| 10 | |
| 0 | M |
+----+---+
(3 rows)
!ok
# Partially null row (10, NULL) is excluded from count.
select count(distinct mod(deptno, 20), gender) as c from emps;
+---+
| C |
+---+
| 2 |
+---+
(1 row)
!ok
select count(mod(deptno, 20), gender) as c from emps;
+---+
| C |
+---+
| 4 |
+---+
(1 row)
!ok
# Nulls in GROUP BY
select x = 1 as x1, count(*) as c
from (values 0, 1, 2, cast(null as integer)) as t(x)
group by x = 1;
X1 BOOLEAN(1)
C BIGINT(19) NOT NULL
!type
+-------+---+
| X1 | C |
+-------+---+
| false | 2 |
| true | 1 |
| | 1 |
+-------+---+
(3 rows)
!ok
# Basic GROUPING SETS
select deptno, count(*) as c from emps group by grouping sets ((), (deptno));
+--------+---+
| DEPTNO | C |
+--------+---+
| 10 | 1 |
| 20 | 2 |
| 40 | 2 |
| | 5 |
+--------+---+
(4 rows)
!ok
# GROUPING SETS on expression
select deptno + 1, count(*) as c from emps group by grouping sets ((), (deptno + 1));
+--------+---+
| EXPR$0 | C |
+--------+---+
| 11 | 1 |
| 21 | 2 |
| 41 | 2 |
| | 5 |
+--------+---+
(4 rows)
!ok
# GROUPING SETS on single-row relation returns multiple rows
select 1 as c
from (values ('a', 'b')) as t (a, b)
group by grouping sets ((a), (b), (b, a));
+---+
| C |
+---+
| 1 |
| 1 |
| 1 |
+---+
(3 rows)
!ok
# CUBE
select deptno + 1, count(*) as c from emp group by cube(deptno, gender);
+--------+---+
| EXPR$0 | C |
+--------+---+
| 11 | 1 |
| 11 | 1 |
| 11 | 2 |
| 21 | 1 |
| 21 | 1 |
| 31 | 2 |
| 31 | 2 |
| 51 | 1 |
| 51 | 1 |
| 51 | 2 |
| 61 | 1 |
| 61 | 1 |
| | 1 |
| | 1 |
| | 3 |
| | 6 |
| | 9 |
+--------+---+
(17 rows)
!ok
# ROLLUP on 1 column
select deptno + 1, count(*) as c
from emp
group by rollup(deptno);
+--------+---+
| EXPR$0 | C |
+--------+---+
| 11 | 2 |
| 21 | 1 |
| 31 | 2 |
| 51 | 2 |
| 61 | 1 |
| | 1 |
| | 9 |
+--------+---+
(7 rows)
!ok
# ROLLUP on 2 columns; project columns in different order
select gender, deptno + 1, count(*) as c
from emp
group by rollup(deptno, gender);
+--------+--------+---+
| GENDER | EXPR$1 | C |
+--------+--------+---+
| M | 21 | 1 |
| F | 11 | 1 |
| F | 31 | 2 |
| F | 51 | 1 |
| F | 61 | 1 |
| F | | 1 |
| M | 11 | 1 |
| M | 51 | 1 |
| | 11 | 2 |
| | 21 | 1 |
| | 31 | 2 |
| | 51 | 2 |
| | 61 | 1 |
| | | 1 |
| | | 9 |
+--------+--------+---+
(15 rows)
!ok
# ROLLUP on column with nulls
# Note the two rows with NULL key (one represents ALL)
select gender, count(*) as c
from emp
group by rollup(gender);
+--------+---+
| GENDER | C |
+--------+---+
| F | 6 |
| M | 3 |
| | 9 |
+--------+---+
(3 rows)
!ok
# ROLLUP plus ORDER BY
select gender, count(*) as c
from emp
group by rollup(gender)
order by c desc;
+--------+---+
| GENDER | C |
+--------+---+
| | 9 |
| F | 6 |
| M | 3 |
+--------+---+
(3 rows)
!ok
# ROLLUP cartesian product
select deptno, count(*) as c
from emp
group by rollup(deptno), rollup(gender);
+--------+---+
| DEPTNO | C |
+--------+---+
| 10 | 1 |
| 10 | 1 |
| 20 | 1 |
| 20 | 1 |
| | 1 |
| 10 | 2 |
| 30 | 2 |
| 30 | 2 |
| 50 | 1 |
| 50 | 1 |
| 50 | 2 |
| 60 | 1 |
| 60 | 1 |
| | 1 |
| | 3 |
| | 6 |
| | 9 |
+--------+---+
(17 rows)
!ok
# ROLLUP cartesian product of with tuple with expression
select deptno / 2 + 1 as half1, count(*) as c
from emp
group by rollup(deptno / 2, gender), rollup(substring(ename FROM 1 FOR 1));
+-------+---+
| HALF1 | C |
+-------+---+
| 11 | 1 |
| 11 | 1 |
| 11 | 1 |
| 11 | 1 |
| 16 | 1 |
| 16 | 1 |
| 16 | 1 |
| 16 | 1 |
| 16 | 2 |
| 16 | 2 |
| 26 | 1 |
| 26 | 1 |
| 26 | 1 |
| 26 | 1 |
| 26 | 1 |
| 26 | 1 |
| 26 | 2 |
| 31 | 1 |
| 31 | 1 |
| 31 | 1 |
| 31 | 1 |
| 6 | 1 |
| 6 | 1 |
| 6 | 1 |
| 6 | 1 |
| 6 | 1 |
| 6 | 1 |
| 6 | 2 |
| | 1 |
| | 1 |
| | 1 |
| | 1 |
| | 1 |
| | 1 |
| | 1 |
| | 1 |
| | 1 |
| | 2 |
| | 2 |
| | 9 |
+-------+---+
(40 rows)
!ok
# ROLLUP with HAVING
select deptno + 1 as d1, count(*) as c
from emp
group by rollup(deptno)
having count(*) > 3;
+----+---+
| D1 | C |
+----+---+
| | 9 |
+----+---+
(1 row)
!ok
# ROLLUP column used in expression; see [CALCITE-5296]
# In a query with ROLLUP, validator wrongly infers that a column is NOT NULL
select deptno, deptno + 1 as d1 from emp group by rollup(deptno);
+--------+----+
| DEPTNO | D1 |
+--------+----+
| 10 | 11 |
| 20 | 21 |
| 30 | 31 |
| 50 | 51 |
| 60 | 61 |
| | |
| | |
+--------+----+
(7 rows)
!ok
# CUBE and DISTINCT
select distinct count(*) from emp group by cube(deptno, gender);
+--------+
| EXPR$0 |
+--------+
| 1 |
| 2 |
| 3 |
| 6 |
| 9 |
+--------+
(5 rows)
!ok
# CUBE and ROLLUP cartesian product over same columns
select deptno, gender, count(*) from emp where deptno = 20 group by cube(deptno, gender), rollup(deptno, gender);
+--------+--------+--------+
| DEPTNO | GENDER | EXPR$2 |
+--------+--------+--------+
| 20 | M | 1 |
| 20 | M | 1 |
| 20 | M | 1 |
| 20 | M | 1 |
| 20 | M | 1 |
| 20 | M | 1 |
| 20 | M | 1 |
| 20 | | 1 |
| 20 | | 1 |
| 20 | | 1 |
| | M | 1 |
| | | 1 |
+--------+--------+--------+
(12 rows)
!ok
# GROUP BY DISTINCT CUBE and ROLLUP cartesian product over same columns
select deptno, gender, count(*) from emp where deptno = 20 group by distinct cube(deptno, gender), rollup(deptno, gender);
+--------+--------+--------+
| DEPTNO | GENDER | EXPR$2 |
+--------+--------+--------+
| 20 | M | 1 |
| 20 | | 1 |
| | M | 1 |
| | | 1 |
+--------+--------+--------+
(4 rows)
!ok
# GROUP BY over empty columns
select count(*) from emp where deptno = 20 group by ();
+--------+
| EXPR$0 |
+--------+
| 1 |
+--------+
(1 row)
!ok
# GROUP BY DISTINCT over empty columns
select count(*) from emp where deptno = 20 group by distinct ();
+--------+
| EXPR$0 |
+--------+
| 1 |
+--------+
(1 row)
!ok
# GROUP BY DISTINCT x + y
select deptno + 1, count(*) from emp where deptno = 20 group by distinct deptno + 1;
+--------+--------+
| EXPR$0 | EXPR$1 |
+--------+--------+
| 21 | 1 |
+--------+--------+
(1 row)
!ok
# CUBE and JOIN
select e.deptno, e.gender, min(e.ename) as min_name
from emp as e join dept as d using (deptno)
group by cube(e.deptno, d.deptno, e.gender)
having count(*) > 2 or gender = 'M' and e.deptno = 10;
+--------+--------+----------+
| DEPTNO | GENDER | MIN_NAME |
+--------+--------+----------+
| 10 | M | Bob |
| 10 | M | Bob |
| | F | Alice |
| | | Alice |
+--------+--------+----------+
(4 rows)
!ok
# GROUPING in SELECT clause of GROUP BY query
select count(*) as c, grouping(deptno) as g
from emp
group by deptno;
+---+---+
| C | G |
+---+---+
| 1 | 0 |
| 1 | 0 |
| 1 | 0 |
| 2 | 0 |
| 2 | 0 |
| 2 | 0 |
+---+---+
(6 rows)
!ok
!use scott
# GROUPING in SELECT clause of CUBE query
select deptno, job, count(*) as c, grouping(deptno) as d,
grouping(job) j, grouping(deptno, job) as x
from "scott".emp
group by cube(deptno, job);
+--------+-----------+----+---+---+---+
| DEPTNO | JOB | C | D | J | X |
+--------+-----------+----+---+---+---+
| 10 | CLERK | 1 | 0 | 0 | 0 |
| 10 | MANAGER | 1 | 0 | 0 | 0 |
| 10 | PRESIDENT | 1 | 0 | 0 | 0 |
| 10 | | 3 | 0 | 1 | 1 |
| 20 | ANALYST | 2 | 0 | 0 | 0 |
| 20 | CLERK | 2 | 0 | 0 | 0 |
| 20 | MANAGER | 1 | 0 | 0 | 0 |
| 20 | | 5 | 0 | 1 | 1 |
| 30 | CLERK | 1 | 0 | 0 | 0 |
| 30 | MANAGER | 1 | 0 | 0 | 0 |
| 30 | SALESMAN | 4 | 0 | 0 | 0 |
| 30 | | 6 | 0 | 1 | 1 |
| | ANALYST | 2 | 1 | 0 | 2 |
| | CLERK | 4 | 1 | 0 | 2 |
| | MANAGER | 3 | 1 | 0 | 2 |
| | PRESIDENT | 1 | 1 | 0 | 2 |
| | SALESMAN | 4 | 1 | 0 | 2 |
| | | 14 | 1 | 1 | 3 |
+--------+-----------+----+---+---+---+
(18 rows)
!ok
!use post
# GROUPING, GROUP_ID, GROUPING_ID in SELECT clause of GROUP BY query
select count(*) as c,
grouping(deptno) as g,
group_id() as gid,
grouping_id(deptno) as gd,
grouping_id(gender) as gg,
grouping_id(gender, deptno) as ggd,
grouping_id(deptno, gender) as gdg
from emp
group by rollup(deptno, gender);
+---+---+-----+----+----+-----+-----+
| C | G | GID | GD | GG | GGD | GDG |
+---+---+-----+----+----+-----+-----+
| 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 |
| 9 | 1 | 0 | 1 | 1 | 3 | 3 |
| 1 | 0 | 0 | 0 | 1 | 2 | 1 |
| 1 | 0 | 0 | 0 | 1 | 2 | 1 |
| 1 | 0 | 0 | 0 | 1 | 2 | 1 |
| 2 | 0 | 0 | 0 | 1 | 2 | 1 |
| 2 | 0 | 0 | 0 | 1 | 2 | 1 |
| 2 | 0 | 0 | 0 | 1 | 2 | 1 |
+---+---+-----+----+----+-----+-----+
(15 rows)
!ok
# GROUPING accepts multiple arguments, gives same result as GROUPING_ID
select count(*) as c,
grouping(deptno) as gd,
grouping_id(deptno) as gid,
grouping(deptno, gender, deptno) as gdgd,
grouping_id(deptno, gender, deptno) as gidgd
from emp
group by rollup(deptno, gender)
having grouping(deptno) <= grouping_id(deptno, gender, deptno);
+---+----+-----+------+-------+
| C | GD | GID | GDGD | GIDGD |
+---+----+-----+------+-------+
| 1 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 |
| 2 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 2 | 2 |
| 1 | 0 | 0 | 2 | 2 |
| 1 | 0 | 0 | 2 | 2 |
| 2 | 0 | 0 | 2 | 2 |
| 2 | 0 | 0 | 2 | 2 |
| 2 | 0 | 0 | 2 | 2 |
| 9 | 1 | 1 | 7 | 7 |
+---+----+-----+------+-------+
(15 rows)
!ok
# GROUPING in ORDER BY clause
select count(*) as c
from emp
group by rollup(deptno)
order by grouping(deptno), c;
+---+
| C |
+---+
| 1 |
| 1 |
| 1 |
| 2 |
| 2 |
| 2 |
| 9 |
+---+
(7 rows)
!ok
# Duplicate argument to GROUPING_ID.
select deptno, gender, grouping_id(deptno, gender, deptno), count(*) as c
from emp
where deptno = 10
group by rollup(gender, deptno);
+--------+--------+--------+---+
| DEPTNO | GENDER | EXPR$2 | C |
+--------+--------+--------+---+
| 10 | F | 0 | 1 |
| 10 | M | 0 | 1 |
| | F | 5 | 1 |
| | M | 5 | 1 |
| | | 7 | 2 |
+--------+--------+--------+---+
(5 rows)
!ok
# GROUPING in SELECT clause of ROLLUP query
select count(*) as c, deptno, grouping(deptno) as g
from emp
group by rollup(deptno);
+---+--------+---+
| C | DEPTNO | G |
+---+--------+---+
| 1 | 20 | 0 |
| 1 | 60 | 0 |
| 1 | | 0 |
| 2 | 10 | 0 |
| 2 | 30 | 0 |
| 2 | 50 | 0 |
| 9 | | 1 |
+---+--------+---+
(7 rows)
!ok
# GROUPING, GROUPING_ID and GROUP_ID
select deptno, gender, grouping(deptno) gd, grouping(gender) gg,
grouping_id(deptno, gender) dg, grouping_id(gender, deptno) gd,
group_id() gid, count(*) c
from emp
group by cube(deptno, gender);
+--------+--------+----+----+----+----+-----+---+
| DEPTNO | GENDER | GD | GG | DG | GD | GID | C |
+--------+--------+----+----+----+----+-----+---+
| 10 | F | 0 | 0 | 0 | 0 | 0 | 1 |
| 10 | M | 0 | 0 | 0 | 0 | 0 | 1 |
| 20 | M | 0 | 0 | 0 | 0 | 0 | 1 |
| 30 | F | 0 | 0 | 0 | 0 | 0 | 2 |
| 50 | F | 0 | 0 | 0 | 0 | 0 | 1 |
| 50 | M | 0 | 0 | 0 | 0 | 0 | 1 |
| 60 | F | 0 | 0 | 0 | 0 | 0 | 1 |
| | F | 0 | 0 | 0 | 0 | 0 | 1 |
| | | 1 | 1 | 3 | 3 | 0 | 9 |
| 10 | | 0 | 1 | 1 | 2 | 0 | 2 |
| 20 | | 0 | 1 | 1 | 2 | 0 | 1 |
| 30 | | 0 | 1 | 1 | 2 | 0 | 2 |
| 50 | | 0 | 1 | 1 | 2 | 0 | 2 |
| 60 | | 0 | 1 | 1 | 2 | 0 | 1 |
| | F | 1 | 0 | 2 | 1 | 0 | 6 |
| | M | 1 | 0 | 2 | 1 | 0 | 3 |
| | | 0 | 1 | 1 | 2 | 0 | 1 |
+--------+--------+----+----+----+----+-----+---+
(17 rows)
!ok
# [CALCITE-1781] Allow expression in CUBE and ROLLUP
select deptno + 1 as d1, deptno + 1 - 1 as d0, count(*) as c
from emp
group by rollup(deptno + 1);
+----+----+---+
| D1 | D0 | C |
+----+----+---+
| 11 | 10 | 2 |
| 21 | 20 | 1 |
| 31 | 30 | 2 |
| 51 | 50 | 2 |
| 61 | 60 | 1 |
| | | 1 |
| | | 9 |
+----+----+---+
(7 rows)
!ok
select mod(deptno, 20) as d, count(*) as c, gender as g
from emp
group by cube(mod(deptno, 20), gender);
+----+---+---+
| D | C | G |
+----+---+---+
| 0 | 1 | F |
| 0 | 1 | M |
| 0 | 2 | |
| 10 | 2 | M |
| 10 | 4 | F |
| 10 | 6 | |
| | 1 | F |
| | 1 | |
| | 3 | M |
| | 6 | F |
| | 9 | |
+----+---+---+
(11 rows)
!ok
select mod(deptno, 20) as d, count(*) as c, gender as g
from emp
group by rollup(mod(deptno, 20), gender);
+----+---+---+
| D | C | G |
+----+---+---+
| 0 | 1 | F |
| 0 | 1 | M |
| 0 | 2 | |
| 10 | 2 | M |
| 10 | 4 | F |
| 10 | 6 | |
| | 1 | F |
| | 1 | |
| | 9 | |
+----+---+---+
(9 rows)
!ok
select count(*) as c
from emp
group by cube(1);
+---+
| C |
+---+
| 9 |
| 9 |
+---+
(2 rows)
!ok
select count(*) as c
from emp
group by rollup(1);
+---+
| C |
+---+
| 9 |
| 9 |
+---+
(2 rows)
!ok
!use scott
select deptno, group_id() as g, count(*) as c
from "scott".emp
group by grouping sets (deptno, (), ());
+--------+---+----+
| DEPTNO | G | C |
+--------+---+----+
| 10 | 0 | 3 |
| 20 | 0 | 5 |
| 30 | 0 | 6 |
| | 0 | 14 |
| | 1 | 14 |
+--------+---+----+
(5 rows)
!ok
# Degenerate case: GROUP_ID() without GROUPING SETS
select group_id() as g
from "scott".emp
group by ();
+---+
| G |
+---+
| 0 |
+---+
(1 row)
!ok
# GROUP_ID() does not make a query into an aggregate query
# (maybe it should)
select group_id() as g
from "scott".emp;
GROUP_ID operator may only occur in an aggregate query
!error
# GROUP_ID() does not make a query into an aggregate query
select group_id() as g, sum(3) as s3
from "scott".emp;
+---+----+
| G | S3 |
+---+----+
| 0 | 42 |
+---+----+
(1 row)
!ok
# Extremely degenerate case: GROUP_ID on an empty table
select group_id() as g, sum(3) as s3
from "scott".emp
where empno < 0;
+---+----+
| G | S3 |
+---+----+
| 0 | |
+---+----+
(1 row)
!ok
# As above, explicit empty GROUP BY
select group_id() as g
from "scott".emp
where empno < 0
group by ();
+---+
| G |
+---+
| 0 |
+---+
(1 row)
!ok
# As above, non-empty GROUP BY
select group_id() as g
from "scott".emp
where empno < 0
group by deptno;
+---+
| G |
+---+
+---+
(0 rows)
!ok
# From http://rwijk.blogspot.com/2008/12/groupid.html
select deptno
, job
, empno
, ename
, sum(sal) sumsal
, case grouping_id(deptno,job,empno)
when 0 then 'grouped by deptno,job,empno,ename'
when 1 then 'grouped by deptno,job'
when 3 then 'grouped by deptno'
when 7 then 'grouped by ()'
end gr_text
from "scott".emp
group by rollup(deptno,job,(empno,ename))
order by deptno
, job
, empno;
+--------+-----------+-------+--------+----------+-----------------------------------+
| DEPTNO | JOB | EMPNO | ENAME | SUMSAL | GR_TEXT |
+--------+-----------+-------+--------+----------+-----------------------------------+
| 10 | CLERK | 7934 | MILLER | 1300.00 | grouped by deptno,job,empno,ename |
| 10 | CLERK | | | 1300.00 | grouped by deptno,job |
| 10 | MANAGER | 7782 | CLARK | 2450.00 | grouped by deptno,job,empno,ename |
| 10 | MANAGER | | | 2450.00 | grouped by deptno,job |
| 10 | PRESIDENT | 7839 | KING | 5000.00 | grouped by deptno,job,empno,ename |
| 10 | PRESIDENT | | | 5000.00 | grouped by deptno,job |
| 10 | | | | 8750.00 | grouped by deptno |
| 20 | ANALYST | 7788 | SCOTT | 3000.00 | grouped by deptno,job,empno,ename |
| 20 | ANALYST | 7902 | FORD | 3000.00 | grouped by deptno,job,empno,ename |
| 20 | ANALYST | | | 6000.00 | grouped by deptno,job |
| 20 | CLERK | 7369 | SMITH | 800.00 | grouped by deptno,job,empno,ename |
| 20 | CLERK | 7876 | ADAMS | 1100.00 | grouped by deptno,job,empno,ename |
| 20 | CLERK | | | 1900.00 | grouped by deptno,job |
| 20 | MANAGER | 7566 | JONES | 2975.00 | grouped by deptno,job,empno,ename |
| 20 | MANAGER | | | 2975.00 | grouped by deptno,job |
| 20 | | | | 10875.00 | grouped by deptno |
| 30 | CLERK | 7900 | JAMES | 950.00 | grouped by deptno,job,empno,ename |
| 30 | CLERK | | | 950.00 | grouped by deptno,job |
| 30 | MANAGER | 7698 | BLAKE | 2850.00 | grouped by deptno,job,empno,ename |
| 30 | MANAGER | | | 2850.00 | grouped by deptno,job |
| 30 | SALESMAN | 7499 | ALLEN | 1600.00 | grouped by deptno,job,empno,ename |
| 30 | SALESMAN | 7521 | WARD | 1250.00 | grouped by deptno,job,empno,ename |
| 30 | SALESMAN | 7654 | MARTIN | 1250.00 | grouped by deptno,job,empno,ename |
| 30 | SALESMAN | 7844 | TURNER | 1500.00 | grouped by deptno,job,empno,ename |
| 30 | SALESMAN | | | 5600.00 | grouped by deptno,job |
| 30 | | | | 9400.00 | grouped by deptno |
| | | | | 29025.00 | grouped by () |
+--------+-----------+-------+--------+----------+-----------------------------------+
(27 rows)
!ok
# From http://rwijk.blogspot.com/2008/12/groupid.html
select deptno
, job
, empno
, ename
, sum(sal) sumsal
, case grouping_id(deptno,job,empno)
when 0 then 'grouped by deptno,job,empno,ename'
when 1 then 'grouped by deptno,job'
when 3 then 'grouped by deptno, grouping set ' || cast(3+group_id() as varchar)
when 7 then 'grouped by (), grouping set ' || cast(5+group_id() as varchar)
end gr_text
from "scott".emp
group by grouping sets
( (deptno,job,empno,ename)
, (deptno,job)
, deptno
, deptno
, ()
, ()
)
order by deptno
, job
, empno;
+--------+-----------+-------+--------+----------+-----------------------------------+
| DEPTNO | JOB | EMPNO | ENAME | SUMSAL | GR_TEXT |
+--------+-----------+-------+--------+----------+-----------------------------------+
| 10 | CLERK | 7934 | MILLER | 1300.00 | grouped by deptno,job,empno,ename |
| 10 | CLERK | | | 1300.00 | grouped by deptno,job |
| 10 | MANAGER | 7782 | CLARK | 2450.00 | grouped by deptno,job,empno,ename |
| 10 | MANAGER | | | 2450.00 | grouped by deptno,job |
| 10 | PRESIDENT | 7839 | KING | 5000.00 | grouped by deptno,job,empno,ename |
| 10 | PRESIDENT | | | 5000.00 | grouped by deptno,job |
| 10 | | | | 8750.00 | grouped by deptno, grouping set 3 |
| 10 | | | | 8750.00 | grouped by deptno, grouping set 4 |
| 20 | ANALYST | 7788 | SCOTT | 3000.00 | grouped by deptno,job,empno,ename |
| 20 | ANALYST | 7902 | FORD | 3000.00 | grouped by deptno,job,empno,ename |
| 20 | ANALYST | | | 6000.00 | grouped by deptno,job |
| 20 | CLERK | 7369 | SMITH | 800.00 | grouped by deptno,job,empno,ename |
| 20 | CLERK | 7876 | ADAMS | 1100.00 | grouped by deptno,job,empno,ename |
| 20 | CLERK | | | 1900.00 | grouped by deptno,job |
| 20 | MANAGER | 7566 | JONES | 2975.00 | grouped by deptno,job,empno,ename |
| 20 | MANAGER | | | 2975.00 | grouped by deptno,job |
| 20 | | | | 10875.00 | grouped by deptno, grouping set 3 |
| 20 | | | | 10875.00 | grouped by deptno, grouping set 4 |
| 30 | CLERK | 7900 | JAMES | 950.00 | grouped by deptno,job,empno,ename |
| 30 | CLERK | | | 950.00 | grouped by deptno,job |
| 30 | MANAGER | 7698 | BLAKE | 2850.00 | grouped by deptno,job,empno,ename |
| 30 | MANAGER | | | 2850.00 | grouped by deptno,job |
| 30 | SALESMAN | 7499 | ALLEN | 1600.00 | grouped by deptno,job,empno,ename |
| 30 | SALESMAN | 7521 | WARD | 1250.00 | grouped by deptno,job,empno,ename |
| 30 | SALESMAN | 7654 | MARTIN | 1250.00 | grouped by deptno,job,empno,ename |
| 30 | SALESMAN | 7844 | TURNER | 1500.00 | grouped by deptno,job,empno,ename |
| 30 | SALESMAN | | | 5600.00 | grouped by deptno,job |
| 30 | | | | 9400.00 | grouped by deptno, grouping set 3 |
| 30 | | | | 9400.00 | grouped by deptno, grouping set 4 |
| | | | | 29025.00 | grouped by (), grouping set 5 |
| | | | | 29025.00 | grouped by (), grouping set 6 |
+--------+-----------+-------+--------+----------+-----------------------------------+
(31 rows)
!ok
# There are duplicate GROUPING SETS
select deptno, sum(sal) as s
from "scott".emp as t
group by grouping sets (deptno, deptno);
+--------+----------+
| DEPTNO | S |
+--------+----------+
| 10 | 8750.00 |
| 10 | 8750.00 |
| 20 | 10875.00 |
| 20 | 10875.00 |
| 30 | 9400.00 |
| 30 | 9400.00 |
+--------+----------+
(6 rows)
!ok
# Similar, not duplicate GROUPING SETS
select deptno, sum(sal) as s
from "scott".emp as t
group by grouping sets (deptno);
+--------+----------+
| DEPTNO | S |
+--------+----------+
| 10 | 8750.00 |
| 20 | 10875.00 |
| 30 | 9400.00 |
+--------+----------+
(3 rows)
!ok
# Complex GROUPING SETS clause that contains duplicates
select sum(sal) as s
from "scott".emp as t
group by job,
grouping sets ( deptno,
grouping sets ( (deptno, comm is null), comm is null),
(comm is null)),
();
+---------+
| S |
+---------+
| 1300.00 |
| 1300.00 |
| 2450.00 |
| 2450.00 |
| 2850.00 |
| 2850.00 |
| 2975.00 |
| 2975.00 |
| 5000.00 |
| 5000.00 |
| 5000.00 |
| 5000.00 |
| 6000.00 |
| 950.00 |
| 950.00 |
| 1900.00 |
| 1900.00 |
| 4150.00 |
| 4150.00 |
| 5600.00 |
| 5600.00 |
| 5600.00 |
| 5600.00 |
| 6000.00 |
| 6000.00 |
| 6000.00 |
| 8275.00 |
| 8275.00 |
+---------+
(28 rows)
!ok
# Equivalent query using flat GROUPING SETS
select sum(sal) as s
from "scott".emp
group by grouping sets ((job, deptno, comm is null),
(job, deptno), (job, comm is null), (job, comm is null));
+---------+
| S |
+---------+
| 1300.00 |
| 1300.00 |
| 2450.00 |
| 2450.00 |
| 2850.00 |
| 2850.00 |
| 2975.00 |
| 2975.00 |
| 5000.00 |
| 5000.00 |
| 5000.00 |
| 5000.00 |
| 6000.00 |
| 950.00 |
| 950.00 |
| 1900.00 |
| 1900.00 |
| 4150.00 |
| 4150.00 |
| 5600.00 |
| 5600.00 |
| 5600.00 |
| 5600.00 |
| 6000.00 |
| 6000.00 |
| 6000.00 |
| 8275.00 |
| 8275.00 |
+---------+
(28 rows)
!ok
# Equivalent query, but with GROUP_ID and GROUPING_ID
select sum(sal) as s,
grouping_id(job, deptno, comm is null) as g,
group_id() as i
from "scott".emp
group by grouping sets ((job, deptno, comm is null),
(job, deptno), (job, comm is null), (job, comm is null))
order by g, i, s desc;
+---------+---+---+
| S | G | I |
+---------+---+---+
| 6000.00 | 0 | 0 |
| 5600.00 | 0 | 0 |
| 5000.00 | 0 | 0 |
| 2975.00 | 0 | 0 |
| 2850.00 | 0 | 0 |
| 2450.00 | 0 | 0 |
| 1900.00 | 0 | 0 |
| 1300.00 | 0 | 0 |
| 950.00 | 0 | 0 |
| 8275.00 | 0 | 1 |
| 6000.00 | 0 | 1 |
| 5600.00 | 0 | 1 |
| 5000.00 | 0 | 1 |
| 4150.00 | 0 | 1 |
| 6000.00 | 1 | 0 |
| 5600.00 | 1 | 0 |
| 5000.00 | 1 | 0 |
| 2975.00 | 1 | 0 |
| 2850.00 | 1 | 0 |
| 2450.00 | 1 | 0 |
| 1900.00 | 1 | 0 |
| 1300.00 | 1 | 0 |
| 950.00 | 1 | 0 |
| 8275.00 | 2 | 0 |
| 6000.00 | 2 | 0 |
| 5600.00 | 2 | 0 |
| 5000.00 | 2 | 0 |
| 4150.00 | 2 | 0 |
+---------+---+---+
(28 rows)
!ok
# [KYLIN-751] Max on negative double values is not working
# [CALCITE-735] Primitive.DOUBLE.min should be large and negative
select max(v) as x, min(v) as n
from (values cast(-86.4 as double), cast(-100 as double)) as t(v);
+-------+--------+
| X | N |
+-------+--------+
| -86.4 | -100.0 |
+-------+--------+
(1 row)
!ok
select max(v) as x, min(v) as n
from (values cast(-86.4 as double), cast(-100 as double), cast(2 as double)) as t(v);
+-----+--------+
| X | N |
+-----+--------+
| 2.0 | -100.0 |
+-----+--------+
(1 row)
!ok
select max(v) as x, min(v) as n
from (values cast(-86.4 as float), cast(-100 as float)) as t(v);
+-------+--------+
| X | N |
+-------+--------+
| -86.4 | -100.0 |
+-------+--------+
(1 row)
!ok
# [CALCITE-551] Sub-query inside aggregate function
SELECT SUM(
CASE WHEN deptno IN (SELECT deptno FROM "scott".dept) THEN 1
ELSE 0 END) as s
FROM "scott".emp;
+----+
| S |
+----+
| 14 |
+----+
(1 row)
!ok
SELECT SUM((select min(cast(deptno as integer)) from "scott".dept)) as s
FROM "scott".emp;
+-----+
| S |
+-----+
| 140 |
+-----+
(1 row)
!ok
# As above, but with GROUP BY
SELECT SUM((select min(cast(deptno as integer)) from "scott".dept)) as s, deptno
FROM "scott".emp
GROUP BY deptno;
+----+--------+
| S | DEPTNO |
+----+--------+
| 30 | 10 |
| 50 | 20 |
| 60 | 30 |
+----+--------+
(3 rows)
!ok
# As above, but with correlation
SELECT SUM(
(select char_length(dname) from "scott".dept where dept.deptno = emp.empno)) as s
FROM "scott".emp;
+---+
| S |
+---+
| |
+---+
(1 row)
!ok
# FUSION rolled up using CARDINALITY
select cardinality(fusion(empnos)) as f_empnos_length
from (
select deptno, collect(empno) as empnos
from "scott".emp
group by deptno);
+-----------------+
| F_EMPNOS_LENGTH |
+-----------------+
| 14 |
+-----------------+
(1 row)
!ok
# FUSION
select cardinality(fusion(empnos)) as f_empnos_length from (select deptno, collect(empno) as empnos
from "scott".emp
group by deptno);
+-----------------+
| F_EMPNOS_LENGTH |
+-----------------+
| 14 |
+-----------------+
(1 row)
!ok
# FUSION on sub-total
select job, fusion(empnos) as empnos
from (
select job, collect(empno) as empnos
from "scott".emp
group by deptno, job)
group by job;
+-----------+--------------------------+
| JOB | EMPNOS |
+-----------+--------------------------+
| ANALYST | [7788, 7902] |
| CLERK | [7934, 7369, 7876, 7900] |
| MANAGER | [7782, 7566, 7698] |
| PRESIDENT | [7839] |
| SALESMAN | [7499, 7521, 7654, 7844] |
+-----------+--------------------------+
(5 rows)
!ok
# FUSION grand total
select fusion(deptnos) as deptnos
from (
select collect(distinct deptno) as deptnos
from "scott".emp
group by deptno);
+--------------+
| DEPTNOS |
+--------------+
| [20, 10, 30] |
+--------------+
(1 row)
!ok
# COLLECT
select deptno, collect(empno) as empnos
from "scott".emp
group by deptno;
+--------+--------------------------------------+
| DEPTNO | EMPNOS |
+--------+--------------------------------------+
| 10 | [7782, 7839, 7934] |
| 20 | [7369, 7566, 7788, 7876, 7902] |
| 30 | [7499, 7521, 7654, 7698, 7844, 7900] |
+--------+--------------------------------------+
(3 rows)
!ok
# COLLECT DISTINCT
# Disabled in JDK 1.7 because order of values is different
!if (jdk18) {
select deptno, collect(distinct job) as jobs
from "scott".emp
group by deptno;
+--------+-----------------------------+
| DEPTNO | JOBS |
+--------+-----------------------------+
| 10 | [MANAGER, CLERK, PRESIDENT] |
| 20 | [CLERK, ANALYST, MANAGER] |
| 30 | [SALESMAN, MANAGER, CLERK] |
+--------+-----------------------------+
(3 rows)
!ok
!}
# COLLECT ... FILTER
select deptno, collect(empno) filter (where empno < 7550) as empnos
from "scott".emp
group by deptno;
+--------+--------------+
| DEPTNO | EMPNOS |
+--------+--------------+
| 10 | [] |
| 20 | [7369] |
| 30 | [7499, 7521] |
+--------+--------------+
(3 rows)
!ok
# Aggregate FILTER
select deptno,
sum(sal) filter (where job = 'CLERK') c_sal,
sum(sal) filter (where job = 'CLERK' and deptno > 10) c10_sal,
max(sal) filter (where job = 'CLERK') as max_c,
min(sal) filter (where job = 'CLERK') as min_c,
max(sal) filter (where job = 'CLERK')
- min(sal) filter (where job = 'CLERK') as range_c,
max(sal) filter (where job = 'SALESMAN')
- min(sal) filter (where job = 'SALESMAN') as range_m
from "scott".emp
group by deptno;
+--------+---------+---------+---------+---------+---------+---------+
| DEPTNO | C_SAL | C10_SAL | MAX_C | MIN_C | RANGE_C | RANGE_M |
+--------+---------+---------+---------+---------+---------+---------+
| 10 | 1300.00 | | 1300.00 | 1300.00 | 0.00 | |
| 20 | 1900.00 | 1900.00 | 1100.00 | 800.00 | 300.00 | |
| 30 | 950.00 | 950.00 | 950.00 | 950.00 | 0.00 | 350.00 |
+--------+---------+---------+---------+---------+---------+---------+
(3 rows)
!ok
# Aggregate FILTER on condition in GROUP BY
select deptno,
sum(sal) filter (where deptno = 10) sal_10
from "scott".emp
group by deptno;
DEPTNO TINYINT(3)
SAL_10 DECIMAL(19, 2)
!type
+--------+---------+
| DEPTNO | SAL_10 |
+--------+---------+
| 10 | 8750.00 |
| 20 | |
| 30 | |
+--------+---------+
(3 rows)
!ok
# Aggregate FILTER with HAVING
select deptno
from "scott".emp
group by deptno
having sum(sal) filter (where job = 'CLERK') > 1000;
+--------+
| DEPTNO |
+--------+
| 10 |
| 20 |
+--------+
(2 rows)
!ok
# Aggregate FILTER with ORDER BY
select deptno
from "scott".emp
group by deptno
order by sum(sal) filter (where job = 'CLERK');
+--------+
| DEPTNO |
+--------+
| 30 |
| 10 |
| 20 |
+--------+
(3 rows)
!ok
# Aggregate FILTER with JOIN
select dept.deptno,
sum(sal) filter (where 1 < 2) as s,
sum(sal) as s1,
count(*) filter (where emp.ename < dept.dname) as c
from "scott".emp
join "scott".dept using (deptno)
group by dept.deptno;
+--------+----------+----------+---+
| DEPTNO | S | S1 | C |
+--------+----------+----------+---+
| 10 | 8750.00 | 8750.00 | 0 |
| 20 | 10875.00 | 10875.00 | 3 |
| 30 | 9400.00 | 9400.00 | 4 |
+--------+----------+----------+---+
(3 rows)
!ok
# Aggregate FILTER with DISTINCT
select deptno,
count(distinct job) as cdj
from "scott".emp
group by deptno;
+--------+-----+
| DEPTNO | CDJ |
+--------+-----+
| 10 | 3 |
| 20 | 3 |
| 30 | 3 |
+--------+-----+
(3 rows)
!ok
select deptno,
count(distinct job) filter (where job <> 'SALESMAN') as cdj
from "scott".emp
group by deptno;
+--------+-----+
| DEPTNO | CDJ |
+--------+-----+
| 10 | 3 |
| 20 | 3 |
| 30 | 2 |
+--------+-----+
(3 rows)
!ok
# Convert CASE to FILTER
select count(case x when 0 then null else -1 end) as c
from (values 0, null, 0, 1) as t(x);
+---+
| C |
+---+
| 2 |
+---+
(1 row)
!ok
# Same, expressed as FILTER
select count(*) filter (where (x = 0) is not true) as c
from (values 0, null, 0, 1) as t(x);
+---+
| C |
+---+
| 2 |
+---+
(1 row)
!ok
# Similar, not quite the same
select count(*) filter (where (x = 0) is false) as c
from (values 0, null, 0, 1) as t(x);
+---+
| C |
+---+
| 1 |
+---+
(1 row)
!ok
# Composite COUNT and FILTER
select count(*) as c,
count(*) filter (where z > 1) as cf,
count(x) as cx,
count(x) filter (where z > 1) as cxf,
count(x, y) as cxy,
count(x, y) filter (where z > 1) as cxyf
from (values (1, 1, 1), (2, 2, 2), (3, null, 3), (null, 4, 4)) as t(x, y, z);
+---+----+----+-----+-----+------+
| C | CF | CX | CXF | CXY | CXYF |
+---+----+----+-----+-----+------+
| 4 | 3 | 3 | 2 | 2 | 1 |
+---+----+----+-----+-----+------+
(1 row)
!ok
# [CALCITE-1293] Bad code generated when argument to COUNT(DISTINCT) is a
# GROUP BY column
select count(distinct deptno) as cd, count(*) as c
from "scott".emp
group by deptno;
+----+---+
| CD | C |
+----+---+
| 1 | 3 |
| 1 | 5 |
| 1 | 6 |
+----+---+
(3 rows)
!ok
select count(distinct deptno) as cd, count(*) as c
from "scott".emp
group by cube(deptno);
+----+---+
| CD | C |
+----+---+
| 1 | 3 |
| 1 | 5 |
| 1 | 6 |
| 3 | 3 |
+----+---+
(4 rows)
!ok
select deptno, count(distinct deptno) as c
from "scott".emp
group by deptno;
+--------+---+
| DEPTNO | C |
+--------+---+
| 10 | 1 |
| 20 | 1 |
| 30 | 1 |
+--------+---+
(3 rows)
!ok
select count(distinct deptno) as c
from "scott".emp
group by deptno;
+---+
| C |
+---+
| 1 |
| 1 |
| 1 |
+---+
(3 rows)
!ok
# Multiple distinct count
select deptno,
count(distinct job) as j, count(distinct mgr) as m
from "scott".emp
group by deptno;
+--------+---+---+
| DEPTNO | J | M |
+--------+---+---+
| 10 | 3 | 2 |
| 20 | 3 | 4 |
| 30 | 3 | 2 |
+--------+---+---+
(3 rows)
!ok
# Multiple distinct count and non-distinct aggregates
select deptno,
count(distinct job) as dj,
count(job) as j,
count(distinct mgr) as m,
sum(sal) as s
from "scott".emp
group by deptno;
+--------+----+---+---+----------+
| DEPTNO | DJ | J | M | S |
+--------+----+---+---+----------+
| 10 | 3 | 3 | 2 | 8750.00 |
| 20 | 3 | 5 | 4 | 10875.00 |
| 30 | 3 | 6 | 2 | 9400.00 |
+--------+----+---+---+----------+
(3 rows)
!ok
# Multiple distinct count and non-distinct aggregates, no GROUP BY
select count(distinct job) as dj,
count(job) as j,
count(distinct mgr) as m,
sum(sal) as s
from "scott".emp;
+----+----+---+----------+
| DJ | J | M | S |
+----+----+---+----------+
| 5 | 14 | 6 | 29025.00 |
+----+----+---+----------+
(1 row)
!ok
# [CALCITE-280] BigDecimal underflow
# Previously threw "java.lang.ArithmeticException: Non-terminating decimal
# expansion; no exact representable decimal result"
select avg(comm) as a, count(comm) as c from "scott".emp where empno < 7844;
+-------------------+---+
| A | C |
+-------------------+---+
| 733.3333333333333 | 3 |
+-------------------+---+
(1 row)
!ok
# [CALCITE-846] Push aggregate with FILTER through UNION ALL
select deptno, count(*) filter (where job = 'CLERK') as cf, count(*) as c
from (
select * from "scott".emp where deptno < 20
union all
select * from "scott".emp where deptno > 20)
group by deptno;
+--------+----+---+
| DEPTNO | CF | C |
+--------+----+---+
| 10 | 1 | 3 |
| 30 | 1 | 6 |
+--------+----+---+
(2 rows)
!ok
EnumerableAggregate(group=[{0}], CF=[COUNT() FILTER $1], C=[COUNT()])
EnumerableCalc(expr#0..1=[{inputs}], expr#2=['CLERK':VARCHAR(9)], expr#3=[=($t0, $t2)], expr#4=[IS TRUE($t3)], DEPTNO=[$t1], $f1=[$t4])
EnumerableUnion(all=[true])
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[20], expr#9=[<($t7, $t8)], JOB=[$t2], DEPTNO=[$t7], $condition=[$t9])
EnumerableTableScan(table=[[scott, EMP]])
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[20], expr#9=[>($t7, $t8)], JOB=[$t2], DEPTNO=[$t7], $condition=[$t9])
EnumerableTableScan(table=[[scott, EMP]])
!plan
# [CALCITE-751] Aggregate join transpose
select count(*)
from "scott".emp join "scott".dept using (deptno);
+--------+
| EXPR$0 |
+--------+
| 14 |
+--------+
(1 row)
!ok
EnumerableAggregate(group=[{}], EXPR$0=[COUNT()])
EnumerableHashJoin(condition=[=($0, $2)], joinType=[inner])
EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
EnumerableTableScan(table=[[scott, DEPT]])
EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], DEPTNO=[$t7])
EnumerableTableScan(table=[[scott, EMP]])
!plan
# Push sum: splits into sum * count
select sum(sal)
from "scott".emp join "scott".dept using (deptno);
+----------+
| EXPR$0 |
+----------+
| 29025.00 |
+----------+
(1 row)
!ok
EnumerableAggregate(group=[{}], EXPR$0=[SUM($1)])
EnumerableHashJoin(condition=[=($2, $3)], joinType=[semi])
EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
EnumerableTableScan(table=[[scott, EMP]])
EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
EnumerableTableScan(table=[[scott, DEPT]])
!plan
# Push sum; no aggregate needed after join
select sum(sal)
from "scott".emp join "scott".dept using (deptno)
group by emp.deptno, dept.deptno;
+----------+
| EXPR$0 |
+----------+
| 10875.00 |
| 8750.00 |
| 9400.00 |
+----------+
(3 rows)
!ok
EnumerableCalc(expr#0..2=[{inputs}], EXPR$0=[$t2])
EnumerableAggregate(group=[{0, 3}], EXPR$0=[SUM($2)])
EnumerableHashJoin(condition=[=($0, $3)], joinType=[inner])
EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
EnumerableTableScan(table=[[scott, DEPT]])
EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
EnumerableTableScan(table=[[scott, EMP]])
!plan
# Push sum; group by only one of the join keys
select sum(sal)
from "scott".emp join "scott".dept using (deptno)
group by emp.deptno;
+----------+
| EXPR$0 |
+----------+
| 10875.00 |
| 8750.00 |
| 9400.00 |
+----------+
(3 rows)
!ok
EnumerableCalc(expr#0..1=[{inputs}], EXPR$0=[$t1])
EnumerableAggregate(group=[{2}], EXPR$0=[SUM($1)])
EnumerableHashJoin(condition=[=($2, $3)], joinType=[semi])
EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
EnumerableTableScan(table=[[scott, EMP]])
EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
EnumerableTableScan(table=[[scott, DEPT]])
!plan
# Push min; Join-Aggregate is optimized to SemiJoin
select min(sal)
from "scott".emp join "scott".dept using (deptno)
group by emp.deptno;
+---------+
| EXPR$0 |
+---------+
| 1300.00 |
| 800.00 |
| 950.00 |
+---------+
(3 rows)
!ok
EnumerableCalc(expr#0..1=[{inputs}], EXPR$0=[$t1])
EnumerableAggregate(group=[{2}], EXPR$0=[MIN($1)])
EnumerableHashJoin(condition=[=($2, $3)], joinType=[semi])
EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
EnumerableTableScan(table=[[scott, EMP]])
EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
EnumerableTableScan(table=[[scott, DEPT]])
!plan
# Push sum and count
select count(*) as c, sum(sal) as s
from "scott".emp join "scott".dept using (deptno);
+----+----------+
| C | S |
+----+----------+
| 14 | 29025.00 |
+----+----------+
(1 row)
!ok
EnumerableAggregate(group=[{}], C=[COUNT()], S=[SUM($1)])
EnumerableHashJoin(condition=[=($2, $3)], joinType=[semi])
EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
EnumerableTableScan(table=[[scott, EMP]])
EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
EnumerableTableScan(table=[[scott, DEPT]])
!plan
# Push sum and count, group by join key
select count(*) as c, sum(sal) as s
from "scott".emp join "scott".dept using (deptno) group by emp.deptno;
+---+----------+
| C | S |
+---+----------+
| 3 | 8750.00 |
| 5 | 10875.00 |
| 6 | 9400.00 |
+---+----------+
(3 rows)
!ok
# No aggregate on top, because output of join is unique
EnumerableCalc(expr#0..2=[{inputs}], C=[$t1], S=[$t2])
EnumerableAggregate(group=[{2}], C=[COUNT()], S=[SUM($1)])
EnumerableHashJoin(condition=[=($2, $3)], joinType=[semi])
EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
EnumerableTableScan(table=[[scott, EMP]])
EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
EnumerableTableScan(table=[[scott, DEPT]])
!plan
# Push sum and count, group by join key plus another column
select count(*) as c, sum(sal) as s
from "scott".emp join "scott".dept using (deptno) group by emp.job, dept.deptno;
+---+---------+
| C | S |
+---+---------+
| 1 | 1300.00 |
| 1 | 2450.00 |
| 1 | 2850.00 |
| 1 | 2975.00 |
| 1 | 5000.00 |
| 1 | 950.00 |
| 2 | 1900.00 |
| 2 | 6000.00 |
| 4 | 5600.00 |
+---+---------+
(9 rows)
!ok
EnumerableCalc(expr#0..3=[{inputs}], C=[$t2], S=[$t3])
EnumerableAggregate(group=[{0, 2}], C=[COUNT()], S=[SUM($3)])
EnumerableHashJoin(condition=[=($0, $4)], joinType=[inner])
EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
EnumerableTableScan(table=[[scott, DEPT]])
EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], JOB=[$t2], SAL=[$t5], DEPTNO=[$t7])
EnumerableTableScan(table=[[scott, EMP]])
!plan
# Push sum and count, group by non-join column
select count(*) as c, sum(sal) as s
from "scott".emp join "scott".dept using (deptno) group by emp.job;
+---+---------+
| C | S |
+---+---------+
| 1 | 5000.00 |
| 2 | 6000.00 |
| 3 | 8275.00 |
| 4 | 4150.00 |
| 4 | 5600.00 |
+---+---------+
(5 rows)
!ok
EnumerableCalc(expr#0..2=[{inputs}], C=[$t1], S=[$t2])
EnumerableAggregate(group=[{1}], C=[COUNT()], S=[SUM($2)])
EnumerableHashJoin(condition=[=($3, $4)], joinType=[semi])
EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], JOB=[$t2], SAL=[$t5], DEPTNO=[$t7])
EnumerableTableScan(table=[[scott, EMP]])
EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
EnumerableTableScan(table=[[scott, DEPT]])
!plan
# Push count and sum, group by superset of join key
select count(*) as c, sum(sal) as s
from "scott".emp join "scott".dept using (deptno) group by emp.job, dept.deptno;
+---+---------+
| C | S |
+---+---------+
| 1 | 5000.00 |
| 2 | 6000.00 |
| 4 | 5600.00 |
| 1 | 1300.00 |
| 1 | 2450.00 |
| 1 | 2850.00 |
| 1 | 2975.00 |
| 1 | 950.00 |
| 2 | 1900.00 |
+---+---------+
(9 rows)
!ok
EnumerableCalc(expr#0..3=[{inputs}], C=[$t2], S=[$t3])
EnumerableAggregate(group=[{0, 2}], C=[COUNT()], S=[SUM($3)])
EnumerableHashJoin(condition=[=($0, $4)], joinType=[inner])
EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
EnumerableTableScan(table=[[scott, DEPT]])
EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], JOB=[$t2], SAL=[$t5], DEPTNO=[$t7])
EnumerableTableScan(table=[[scott, EMP]])
!plan
# Push count and sum, group by a column being aggregated
select count(*) as c, sum(sal) as s
from "scott".emp join "scott".dept using (deptno) group by emp.sal;
+---+---------+
| C | S |
+---+---------+
| 1 | 5000.00 |
| 2 | 6000.00 |
| 1 | 1100.00 |
| 1 | 1300.00 |
| 1 | 1500.00 |
| 1 | 1600.00 |
| 1 | 2450.00 |
| 1 | 2850.00 |
| 1 | 2975.00 |
| 1 | 800.00 |
| 1 | 950.00 |
| 2 | 2500.00 |
+---+---------+
(12 rows)
!ok
EnumerableCalc(expr#0..2=[{inputs}], C=[$t1], S=[$t2])
EnumerableAggregate(group=[{1}], C=[COUNT()], S=[SUM($1)])
EnumerableHashJoin(condition=[=($2, $3)], joinType=[semi])
EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
EnumerableTableScan(table=[[scott, EMP]])
EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
EnumerableTableScan(table=[[scott, DEPT]])
!plan
# Push sum, self-join, returning one row with a null value
select sum(e.sal) as s
from "scott".emp e join "scott".emp m on e.mgr = e.empno;
+---+
| S |
+---+
| |
+---+
(1 row)
!ok
# Push sum, self-join
select sum(e.sal) as s
from "scott".emp e join "scott".emp m on e.mgr = m.empno;
+----------+
| S |
+----------+
| 24025.00 |
+----------+
(1 row)
!ok
# Push sum, self-join, cartesian product over nullable and non-nullable columns
select sum(e.sal) as ss, count(e.sal) as cs, count(e.mgr) as cm
from "scott".emp e
join "scott".emp m on e.deptno = m.deptno
group by e.deptno, m.deptno;
+----------+----+----+
| SS | CS | CM |
+----------+----+----+
| 26250.00 | 9 | 6 |
| 54375.00 | 25 | 25 |
| 56400.00 | 36 | 36 |
+----------+----+----+
(3 rows)
!ok
# Push sum, self-join, aggregate by column on "many" side
select sum(e.sal) as s
from "scott".emp e join "scott".emp m on e.mgr = m.empno
group by m.empno;
+---------+
| S |
+---------+
| 1100.00 |
| 1300.00 |
| 6000.00 |
| 6550.00 |
| 800.00 |
| 8275.00 |
+---------+
(6 rows)
!ok
# Push sum, self-join, aggregate by column on "one" side.
# Note inflated totals due to cartesian product.
select sum(m.sal) as s
from "scott".emp e join "scott".emp m on e.mgr = m.empno
group by m.empno;
+----------+
| S |
+----------+
| 14250.00 |
| 15000.00 |
| 2450.00 |
| 3000.00 |
| 3000.00 |
| 5950.00 |
+----------+
(6 rows)
!ok
# Collation of LogicalAggregate ([CALCITE-783] and [CALCITE-822])
select sum(x) as sum_cnt,
count(distinct y) as cnt_dist
from
(
select
count(*) as x,
t1.job as y,
t1.deptno as z
from
"scott".emp t1
group by t1.job, t1.deptno
order by t1.job, t1.deptno
) sq(x,y,z)
group by z
order by sum_cnt;
+---------+----------+
| SUM_CNT | CNT_DIST |
+---------+----------+
| 3 | 3 |
| 5 | 3 |
| 6 | 3 |
+---------+----------+
(3 rows)
!ok
# [CALCITE-938] Aggregate row count
select empno, d.deptno
from "scott".emp
join (select distinct deptno from "scott".dept) d
using (deptno);
+-------+--------+
| EMPNO | DEPTNO |
+-------+--------+
| 7369 | 20 |
| 7499 | 30 |
| 7521 | 30 |
| 7566 | 20 |
| 7654 | 30 |
| 7698 | 30 |
| 7782 | 10 |
| 7788 | 20 |
| 7839 | 10 |
| 7844 | 30 |
| 7876 | 20 |
| 7900 | 30 |
| 7902 | 20 |
| 7934 | 10 |
+-------+--------+
(14 rows)
!ok
EnumerableCalc(expr#0..2=[{inputs}], EMPNO=[$t1], DEPTNO=[$t0])
EnumerableHashJoin(condition=[=($0, $2)], joinType=[inner])
EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
EnumerableTableScan(table=[[scott, DEPT]])
EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], DEPTNO=[$t7])
EnumerableTableScan(table=[[scott, EMP]])
!plan
# [CALCITE-1016] "GROUP BY constant" on empty relation should return 0 rows
# Should return 0 rows
select '1' from "scott".emp where false group by 1;
+--------+
| EXPR$0 |
+--------+
+--------+
(0 rows)
!ok
# Should return 0 rows
select count('1') from "scott".emp where false group by 1;
+--------+
| EXPR$0 |
+--------+
+--------+
(0 rows)
!ok
# Should return 1 row
select count('1') from "scott".emp where false group by ();
+--------+
| EXPR$0 |
+--------+
| 0 |
+--------+
(1 row)
!ok
# Should return 1 row
select count('1') from "scott".emp where false;
+--------+
| EXPR$0 |
+--------+
| 0 |
+--------+
(1 row)
!ok
# As above, but on VALUES rather than table
# Should return 0 rows
select '1' from (values (1, 2), (3, 4)) where false group by 1;
+--------+
| EXPR$0 |
+--------+
+--------+
(0 rows)
!ok
# Should return 0 rows
select count('1') from (values (1, 2), (3, 4)) where false group by 1;
+--------+
| EXPR$0 |
+--------+
+--------+
(0 rows)
!ok
# Should return 1 row
select count('1') from (values (1, 2), (3, 4)) where false group by ();
+--------+
| EXPR$0 |
+--------+
| 0 |
+--------+
(1 row)
!ok
# Should return 1 row
select count('1') from (values (1, 2), (3, 4)) where false;
+--------+
| EXPR$0 |
+--------+
| 0 |
+--------+
(1 row)
!ok
# As above, but on join
# Should return 0 rows
select '1' from "scott".emp join "scott".dept using (deptno) where false group by 1;
+--------+
| EXPR$0 |
+--------+
+--------+
(0 rows)
!ok
# Should return 0 rows
select count('1') from "scott".emp join "scott".dept using (deptno) where false group by 1;
+--------+
| EXPR$0 |
+--------+
+--------+
(0 rows)
!ok
# Should return 1 row
select count('1') from "scott".emp join "scott".dept using (deptno) where false group by ();
+--------+
| EXPR$0 |
+--------+
| 0 |
+--------+
(1 row)
!ok
# Should return 1 row
select count('1') from "scott".emp join "scott".dept using (deptno) where false;
+--------+
| EXPR$0 |
+--------+
| 0 |
+--------+
(1 row)
!ok
# [CALCITE-1023] Planner rule that removes Aggregate keys that are constant
select job, sum(sal) as sum_sal, deptno
from "scott".emp
where deptno = 10
group by deptno, job;
+-----------+---------+--------+
| JOB | SUM_SAL | DEPTNO |
+-----------+---------+--------+
| CLERK | 1300.00 | 10 |
| MANAGER | 2450.00 | 10 |
| PRESIDENT | 5000.00 | 10 |
+-----------+---------+--------+
(3 rows)
!ok
!if (fixed.calcite1048) {
select job, sum(sal) as sum_sal, deptno
from "scott".emp
where deptno = 10
group by deptno, job;
EnumerableCalc(expr#0..2=[{inputs}], JOB=[$t0], SUM_SAL=[$t2], DEPTNO=[$t1])
EnumerableAggregate(group=[{2, 7}], SUM_SAL=[SUM($5)])
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[CAST($t7):INTEGER], expr#9=[10], expr#10=[=($t8, $t9)], proj#0..7=[{exprs}], $condition=[$t10])
EnumerableTableScan(table=[[scott, EMP]])
!plan
!}
# Aggregate query that uses no columns throws AssertionError in
# RelFieldTrimmer.trimFields
select 2 as two
from emp
group by ();
+-----+
| TWO |
+-----+
| 2 |
+-----+
(1 row)
!ok
# As previous, as a scalar sub-query
select deptno,
(select 2 as two from emp group by ()) as two
from emp
group by deptno;
+--------+-----+
| DEPTNO | TWO |
+--------+-----+
| 10 | 2 |
| 20 | 2 |
| 30 | 2 |
+--------+-----+
(3 rows)
!ok
# As previous, grand total
select (select 2 from emp group by ()) as two
from emp
group by ();
+-----+
| TWO |
+-----+
| 2 |
+-----+
(1 row)
!ok
!use orinoco
# FLOOR to achieve a 2-hour window
select floor(rowtime to hour) as rowtime, count(*) as c
from Orders
group by floor(rowtime to hour);
+---------------------+---+
| ROWTIME | C |
+---------------------+---+
| 2015-02-15 10:00:00 | 4 |
| 2015-02-15 11:00:00 | 1 |
+---------------------+---+
(2 rows)
!ok
# FLOOR applied to intervals, to achieve a 2-hour window
select rowtime, count(*) as c
from (
select timestamp '1970-1-1 0:0:0' + (floor(timestamp '1970-1-1 0:0:0' + ((rowtime - timestamp '1970-1-1 0:0:0') second) / 2 to hour) - timestamp '1970-1-1 0:0:0') second * 2 as rowtime
from Orders)
group by rowtime;
+---------------------+---+
| ROWTIME | C |
+---------------------+---+
| 2015-02-15 10:00:00 | 5 |
+---------------------+---+
(1 row)
!ok
# [CALCITE-729] IndexOutOfBoundsException in ROLLUP query on JDBC data source
!use jdbc_scott
select deptno, job, count(*) as c
from jdbc_scott.emp
group by rollup (deptno, job)
order by 1, 2;
+--------+-----------+----+
| DEPTNO | JOB | C |
+--------+-----------+----+
| 10 | CLERK | 1 |
| 10 | MANAGER | 1 |
| 10 | PRESIDENT | 1 |
| 10 | | 3 |
| 20 | ANALYST | 2 |
| 20 | CLERK | 2 |
| 20 | MANAGER | 1 |
| 20 | | 5 |
| 30 | CLERK | 1 |
| 30 | MANAGER | 1 |
| 30 | SALESMAN | 4 |
| 30 | | 6 |
| | | 14 |
+--------+-----------+----+
(13 rows)
!ok
# [CALCITE-799] Incorrect result for "HAVING count(*) > 1"
select d.deptno, min(e.empid) as empid
from (values (100, 'Bill', 1),
(200, 'Eric', 1),
(150, 'Sebastian', 3)) as e(empid, name, deptno)
join (values (1, 'LeaderShip'),
(2, 'TestGroup'),
(3, 'Development')) as d(deptno, name)
on e.deptno = d.deptno
group by d.deptno
having count(*) > 1;
+--------+-------+
| DEPTNO | EMPID |
+--------+-------+
| 1 | 100 |
+--------+-------+
(1 row)
!ok
# Same, using USING (combining [CALCITE-799] and [CALCITE-801])
select d.deptno, min(e.empid) as empid
from (values (100, 'Bill', 1),
(200, 'Eric', 1),
(150, 'Sebastian', 3)) as e(empid, name, deptno)
join (values (1, 'LeaderShip'),
(2, 'TestGroup'),
(3, 'Development')) as d(deptno, name)
using (deptno)
group by d.deptno
having count(*) > 1;
+--------+-------+
| DEPTNO | EMPID |
+--------+-------+
| 1 | 100 |
+--------+-------+
(1 row)
!ok
# [CALCITE-886] System functions in the GROUP BY clause
# Calls to system functions do not have "()", which may confuse the validator.
select CURRENT_USER as CUSER
from jdbc_scott.emp
group by CURRENT_USER;
+-------+
| CUSER |
+-------+
| SCOTT |
+-------+
(1 row)
!ok
# [CALCITE-886] System functions in the GROUP BY clause
# System function inside a GROUPING SETS.
select CURRENT_USER as CUSER
from jdbc_scott.emp
group by grouping sets(CURRENT_USER);
+-------+
| CUSER |
+-------+
| SCOTT |
+-------+
(1 row)
!ok
# [CALCITE-1381] SqlCall.clone should retain function quantifier
select nullif(count(distinct '1'),0)
from (values (1,2),(3,4));
+--------+
| EXPR$0 |
+--------+
| 1 |
+--------+
(1 row)
!ok
EnumerableCalc(expr#0=[{inputs}], expr#1=[0:BIGINT], expr#2=[=($t0, $t1)], expr#3=[null:BIGINT], expr#4=[CASE($t2, $t3, $t0)], EXPR$0=[$t4])
EnumerableAggregate(group=[{}], agg#0=[COUNT($0)])
EnumerableAggregate(group=[{0}])
EnumerableValues(tuples=[[{ '1' }, { '1' }]])
!plan
!use scott
# [CALCITE-4345] SUM(CASE WHEN b THEN 1) etc.
select
sum(sal) as sum_sal,
count(distinct case
when job = 'CLERK'
then deptno else null end) as count_distinct_clerk,
sum(case when deptno = 10 then sal end) as sum_sal_d10,
sum(case when deptno = 20 then sal else 0 end) as sum_sal_d20,
sum(case when deptno = 30 then 1 else 0 end) as count_d30,
count(case when deptno = 40 then 'x' end) as count_d40,
sum(case when deptno = 45 then 1 end) as count_d45,
sum(case when deptno = 50 then 1 else null end) as count_d50,
sum(case when deptno = 60 then null end) as sum_null_d60,
sum(case when deptno = 70 then null else 1 end) as sum_null_d70,
count(case when deptno = 20 then 1 end) as count_d20
from emp;
+----------+----------------------+-------------+-------------+-----------+-----------+-----------+-----------+--------------+--------------+-----------+
| SUM_SAL | COUNT_DISTINCT_CLERK | SUM_SAL_D10 | SUM_SAL_D20 | COUNT_D30 | COUNT_D40 | COUNT_D45 | COUNT_D50 | SUM_NULL_D60 | SUM_NULL_D70 | COUNT_D20 |
+----------+----------------------+-------------+-------------+-----------+-----------+-----------+-----------+--------------+--------------+-----------+
| 29025.00 | 3 | 8750.00 | 10875.00 | 6 | 0 | | | | 14 | 5 |
+----------+----------------------+-------------+-------------+-----------+-----------+-----------+-----------+--------------+--------------+-----------+
(1 row)
!ok
# Check that SUM produces NULL on empty set, COUNT produces 0.
select
sum(sal) as sum_sal,
count(distinct case
when job = 'CLERK'
then deptno else null end) as count_distinct_clerk,
sum(case when deptno = 10 then sal end) as sum_sal_d10,
sum(case when deptno = 20 then sal else 0 end) as sum_sal_d20,
sum(case when deptno = 30 then 1 else 0 end) as count_d30,
count(case when deptno = 40 then 'x' end) as count_d40,
sum(case when deptno = 45 then 1 end) as count_d45,
sum(case when deptno = 50 then 1 else null end) as count_d50,
sum(case when deptno = 60 then null end) as sum_null_d60,
sum(case when deptno = 70 then null else 1 end) as sum_null_d70,
count(case when deptno = 20 then 1 end) as count_d20
from emp
where false;
+---------+----------------------+-------------+-------------+-----------+-----------+-----------+-----------+--------------+--------------+-----------+
| SUM_SAL | COUNT_DISTINCT_CLERK | SUM_SAL_D10 | SUM_SAL_D20 | COUNT_D30 | COUNT_D40 | COUNT_D45 | COUNT_D50 | SUM_NULL_D60 | SUM_NULL_D70 | COUNT_D20 |
+---------+----------------------+-------------+-------------+-----------+-----------+-----------+-----------+--------------+--------------+-----------+
| | 0 | | | | 0 | | | | | 0 |
+---------+----------------------+-------------+-------------+-----------+-----------+-----------+-----------+--------------+--------------+-----------+
(1 row)
!ok
# [CALCITE-4609] AggregateRemoveRule throws while handling AVG
# Note that the outer GROUP BY is a no-op, and therefore
# AggregateRemoveRule kicks in.
SELECT job, AVG(avg_sal) AS avg_sal2
FROM (
SELECT deptno, job, AVG(sal) AS avg_sal
FROM "scott".emp
GROUP BY deptno, job) AS EmpAnalytics
WHERE deptno = 30
GROUP BY job;
+----------+----------+
| JOB | AVG_SAL2 |
+----------+----------+
| CLERK | 950.00 |
| MANAGER | 2850.00 |
| SALESMAN | 1400.00 |
+----------+----------+
(3 rows)
!ok
# Same, using WITH
WITH EmpAnalytics AS (
SELECT deptno, job, AVG(sal) AS avg_sal
FROM "scott".emp
GROUP BY deptno, job)
SELECT job, AVG(avg_sal) AS avg_sal2
FROM EmpAnalytics
WHERE deptno = 30
GROUP BY job;
+----------+----------+
| JOB | AVG_SAL2 |
+----------+----------+
| CLERK | 950.00 |
| MANAGER | 2850.00 |
| SALESMAN | 1400.00 |
+----------+----------+
(3 rows)
!ok
# [CALCITE-1930] AggregateExpandDistinctAggregateRules should handle multiple aggregate calls with same input ref
select count(distinct EMPNO), COUNT(SAL), MIN(SAL), MAX(SAL) from "scott".emp;
+--------+--------+--------+---------+
| EXPR$0 | EXPR$1 | EXPR$2 | EXPR$3 |
+--------+--------+--------+---------+
| 14 | 14 | 800.00 | 5000.00 |
+--------+--------+--------+---------+
(1 row)
!ok
EnumerableCalc(expr#0..3=[{inputs}], expr#4=[CAST($t1):BIGINT NOT NULL], EXPR$0=[$t0], EXPR$1=[$t4], EXPR$2=[$t2], EXPR$3=[$t3])
EnumerableAggregate(group=[{}], EXPR$0=[COUNT($0) FILTER $4], EXPR$1=[MIN($1) FILTER $5], EXPR$2=[MIN($2) FILTER $5], EXPR$3=[MIN($3) FILTER $5])
EnumerableCalc(expr#0..4=[{inputs}], expr#5=[0], expr#6=[=($t4, $t5)], expr#7=[1], expr#8=[=($t4, $t7)], proj#0..3=[{exprs}], $g_0=[$t6], $g_1=[$t8])
EnumerableAggregate(group=[{0}], groups=[[{0}, {}]], EXPR$1=[COUNT($5)], EXPR$2=[MIN($5)], EXPR$3=[MAX($5)], $g=[GROUPING($0)])
EnumerableTableScan(table=[[scott, EMP]])
!plan
# [CALCITE-1930] AggregateExpandDistinctAggregateRules should handle multiple aggregate calls with same input ref
select count(distinct DEPTNO), COUNT(JOB), MIN(SAL), MAX(SAL) from "scott".emp;
+--------+--------+--------+---------+
| EXPR$0 | EXPR$1 | EXPR$2 | EXPR$3 |
+--------+--------+--------+---------+
| 3 | 14 | 800.00 | 5000.00 |
+--------+--------+--------+---------+
(1 row)
!ok
EnumerableCalc(expr#0..3=[{inputs}], expr#4=[CAST($t1):BIGINT NOT NULL], EXPR$0=[$t0], EXPR$1=[$t4], EXPR$2=[$t2], EXPR$3=[$t3])
EnumerableAggregate(group=[{}], EXPR$0=[COUNT($0) FILTER $4], EXPR$1=[MIN($1) FILTER $5], EXPR$2=[MIN($2) FILTER $5], EXPR$3=[MIN($3) FILTER $5])
EnumerableCalc(expr#0..4=[{inputs}], expr#5=[0], expr#6=[=($t4, $t5)], expr#7=[1], expr#8=[=($t4, $t7)], proj#0..3=[{exprs}], $g_0=[$t6], $g_1=[$t8])
EnumerableAggregate(group=[{7}], groups=[[{7}, {}]], EXPR$1=[COUNT($2)], EXPR$2=[MIN($5)], EXPR$3=[MAX($5)], $g=[GROUPING($7)])
EnumerableTableScan(table=[[scott, EMP]])
!plan
# [CALCITE-1930] AggregateExpandDistinctAggregateRules should handle multiple aggregate calls with same input ref
select MGR, count(distinct DEPTNO), COUNT(JOB), MIN(SAL), MAX(SAL) from "scott".emp group by MGR;
+------+--------+--------+---------+---------+
| MGR | EXPR$1 | EXPR$2 | EXPR$3 | EXPR$4 |
+------+--------+--------+---------+---------+
| 7566 | 1 | 2 | 3000.00 | 3000.00 |
| 7698 | 1 | 5 | 950.00 | 1600.00 |
| 7782 | 1 | 1 | 1300.00 | 1300.00 |
| 7788 | 1 | 1 | 1100.00 | 1100.00 |
| 7839 | 3 | 3 | 2450.00 | 2975.00 |
| 7902 | 1 | 1 | 800.00 | 800.00 |
| | 1 | 1 | 5000.00 | 5000.00 |
+------+--------+--------+---------+---------+
(7 rows)
!ok
EnumerableCalc(expr#0..4=[{inputs}], expr#5=[CAST($t2):BIGINT NOT NULL], proj#0..1=[{exprs}], EXPR$2=[$t5], EXPR$3=[$t3], EXPR$4=[$t4])
EnumerableAggregate(group=[{0}], EXPR$1=[COUNT($1) FILTER $5], EXPR$2=[MIN($2) FILTER $6], EXPR$3=[MIN($3) FILTER $6], EXPR$4=[MIN($4) FILTER $6])
EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[=($t5, $t6)], expr#8=[1], expr#9=[=($t5, $t8)], proj#0..4=[{exprs}], $g_0=[$t7], $g_1=[$t9])
EnumerableAggregate(group=[{3, 7}], groups=[[{3, 7}, {3}]], EXPR$2=[COUNT($2)], EXPR$3=[MIN($5)], EXPR$4=[MAX($5)], $g=[GROUPING($3, $7)])
EnumerableTableScan(table=[[scott, EMP]])
!plan
# [CALCITE-1930] AggregateExpandDistinctAggregateRules should handle multiple aggregate calls with same input ref
select MGR, count(distinct DEPTNO, JOB), MIN(SAL), MAX(SAL) from "scott".emp group by MGR;
+------+--------+---------+---------+
| MGR | EXPR$1 | EXPR$2 | EXPR$3 |
+------+--------+---------+---------+
| 7566 | 1 | 3000.00 | 3000.00 |
| 7698 | 2 | 950.00 | 1600.00 |
| 7782 | 1 | 1300.00 | 1300.00 |
| 7788 | 1 | 1100.00 | 1100.00 |
| 7839 | 3 | 2450.00 | 2975.00 |
| 7902 | 1 | 800.00 | 800.00 |
| | 1 | 5000.00 | 5000.00 |
+------+--------+---------+---------+
(7 rows)
!ok
EnumerableAggregate(group=[{0}], EXPR$1=[COUNT($1, $2) FILTER $5], EXPR$2=[MIN($3) FILTER $6], EXPR$3=[MIN($4) FILTER $6])
EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[=($t5, $t6)], expr#8=[3], expr#9=[=($t5, $t8)], MGR=[$t1], DEPTNO=[$t2], JOB=[$t0], EXPR$2=[$t3], EXPR$3=[$t4], $g_0=[$t7], $g_3=[$t9])
EnumerableAggregate(group=[{2, 3, 7}], groups=[[{2, 3, 7}, {3}]], EXPR$2=[MIN($5)], EXPR$3=[MAX($5)], $g=[GROUPING($3, $7, $2)])
EnumerableTableScan(table=[[scott, EMP]])
!plan
# [CALCITE-2366] Add support for ANY_VALUE function
# Without GROUP BY clause
SELECT any_value(empno) as anyempno from "scott".emp;
+----------+
| ANYEMPNO |
+----------+
| 7934 |
+----------+
(1 row)
!ok
EnumerableAggregate(group=[{}], ANYEMPNO=[ANY_VALUE($0)])
EnumerableTableScan(table=[[scott, EMP]])
!plan
# [CALCITE-2366] Add support for ANY_VALUE function
# With GROUP BY clause
SELECT any_value(empno) as anyempno from "scott".emp group by sal;
+----------+
| ANYEMPNO |
+----------+
| 7369 |
| 7499 |
| 7566 |
| 7654 |
| 7698 |
| 7782 |
| 7839 |
| 7844 |
| 7876 |
| 7900 |
| 7902 |
| 7934 |
+----------+
(12 rows)
!ok
EnumerableCalc(expr#0..1=[{inputs}], ANYEMPNO=[$t1])
EnumerableAggregate(group=[{5}], ANYEMPNO=[ANY_VALUE($0)])
EnumerableTableScan(table=[[scott, EMP]])
!plan
# [CALCITE-1776, CALCITE-2402] REGR_COUNT
SELECT regr_count(COMM, SAL) as "REGR_COUNT(COMM, SAL)",
regr_count(EMPNO, SAL) as "REGR_COUNT(EMPNO, SAL)"
from "scott".emp;
+-----------------------+------------------------+
| REGR_COUNT(COMM, SAL) | REGR_COUNT(EMPNO, SAL) |
+-----------------------+------------------------+
| 4 | 14 |
+-----------------------+------------------------+
(1 row)
!ok
EnumerableAggregate(group=[{}], REGR_COUNT(COMM, SAL)=[REGR_COUNT($6, $5)], REGR_COUNT(EMPNO, SAL)=[REGR_COUNT($5)])
EnumerableTableScan(table=[[scott, EMP]])
!plan
# [CALCITE-1776, CALCITE-2402] REGR_SXX, REGR_SXY, REGR_SYY
SELECT
regr_sxx(COMM, SAL) as "REGR_SXX(COMM, SAL)",
regr_syy(COMM, SAL) as "REGR_SYY(COMM, SAL)",
regr_sxx(SAL, COMM) as "REGR_SXX(SAL, COMM)",
regr_syy(SAL, COMM) as "REGR_SYY(SAL, COMM)"
from "scott".emp;
+---------------------+---------------------+---------------------+---------------------+
| REGR_SXX(COMM, SAL) | REGR_SYY(COMM, SAL) | REGR_SXX(SAL, COMM) | REGR_SYY(SAL, COMM) |
+---------------------+---------------------+---------------------+---------------------+
| 95000.0000 | 1090000.0000 | 1090000.0000 | 95000.0000 |
+---------------------+---------------------+---------------------+---------------------+
(1 row)
!ok
# [CALCITE-1776, CALCITE-2402] COVAR_POP, COVAR_SAMP, VAR_SAMP, VAR_POP
SELECT
covar_pop(COMM, COMM) as "COVAR_POP(COMM, COMM)",
covar_samp(SAL, SAL) as "COVAR_SAMP(SAL, SAL)",
var_pop(COMM) as "VAR_POP(COMM)",
var_samp(SAL) as "VAR_SAMP(SAL)"
from "scott".emp;
+-----------------------+----------------------+---------------+-------------------+
| COVAR_POP(COMM, COMM) | COVAR_SAMP(SAL, SAL) | VAR_POP(COMM) | VAR_SAMP(SAL) |
+-----------------------+----------------------+---------------+-------------------+
| 272500.0000 | 1398313.873626374 | 272500.0000 | 1398313.873626374 |
+-----------------------+----------------------+---------------+-------------------+
(1 row)
!ok
# [CALCITE-1776, CALCITE-2402] REGR_COUNT with group by
SELECT SAL, regr_count(COMM, SAL) as "REGR_COUNT(COMM, SAL)",
regr_count(EMPNO, SAL) as "REGR_COUNT(EMPNO, SAL)"
from "scott".emp group by SAL;
+---------+-----------------------+------------------------+
| SAL | REGR_COUNT(COMM, SAL) | REGR_COUNT(EMPNO, SAL) |
+---------+-----------------------+------------------------+
| 1100.00 | 0 | 1 |
| 1250.00 | 2 | 2 |
| 1300.00 | 0 | 1 |
| 1500.00 | 1 | 1 |
| 1600.00 | 1 | 1 |
| 2450.00 | 0 | 1 |
| 2850.00 | 0 | 1 |
| 2975.00 | 0 | 1 |
| 3000.00 | 0 | 2 |
| 5000.00 | 0 | 1 |
| 800.00 | 0 | 1 |
| 950.00 | 0 | 1 |
+---------+-----------------------+------------------------+
(12 rows)
!ok
# [CALCITE-1776, CALCITE-2402] COVAR_POP, COVAR_SAMP, VAR_SAMP, VAR_POP with group by
SELECT
MONTH(HIREDATE) as "MONTH",
covar_samp(SAL, COMM) as "COVAR_SAMP(SAL, COMM)",
var_pop(COMM) as "VAR_POP(COMM)",
var_samp(SAL) as "VAR_SAMP(SAL)"
from "scott".emp
group by MONTH(HIREDATE);
+-------+-----------------------+---------------+-------------------+
| MONTH | COVAR_SAMP(SAL, COMM) | VAR_POP(COMM) | VAR_SAMP(SAL) |
+-------+-----------------------+---------------+-------------------+
| 1 | | | 1201250.0000 |
| 11 | | | |
| 12 | | | 1510833.333333334 |
| 2 | -35000.0000 | 10000.0000 | 831458.333333335 |
| 4 | | | |
| 5 | | | |
| 6 | | | |
| 9 | -175000.0000 | 490000.0000 | 31250.0000 |
+-------+-----------------------+---------------+-------------------+
(8 rows)
!ok
# [CALCITE-2224] WITHIN GROUP clause for aggregate functions
select deptno, collect(empno) within group (order by empno asc) as empnos
from "scott".emp
group by deptno;
+--------+--------------------------------------+
| DEPTNO | EMPNOS |
+--------+--------------------------------------+
| 10 | [7782, 7839, 7934] |
| 20 | [7369, 7566, 7788, 7876, 7902] |
| 30 | [7499, 7521, 7654, 7698, 7844, 7900] |
+--------+--------------------------------------+
(3 rows)
!ok
EnumerableAggregate(group=[{7}], EMPNOS=[COLLECT($0) WITHIN GROUP ([0])])
EnumerableTableScan(table=[[scott, EMP]])
!plan
select deptno, collect(empno) within group (order by empno desc) as empnos
from "scott".emp
group by deptno;
+--------+--------------------------------------+
| DEPTNO | EMPNOS |
+--------+--------------------------------------+
| 10 | [7934, 7839, 7782] |
| 20 | [7902, 7876, 7788, 7566, 7369] |
| 30 | [7900, 7844, 7698, 7654, 7521, 7499] |
+--------+--------------------------------------+
(3 rows)
!ok
EnumerableAggregate(group=[{7}], EMPNOS=[COLLECT($0) WITHIN GROUP ([0 DESC])])
EnumerableTableScan(table=[[scott, EMP]])
!plan
select
deptno,
collect(empno) as empnos_1,
collect(empno) within group (order by empno desc) as empnos_2
from "scott".emp
group by deptno;
+--------+--------------------------------------+--------------------------------------+
| DEPTNO | EMPNOS_1 | EMPNOS_2 |
+--------+--------------------------------------+--------------------------------------+
| 10 | [7782, 7839, 7934] | [7934, 7839, 7782] |
| 20 | [7369, 7566, 7788, 7876, 7902] | [7902, 7876, 7788, 7566, 7369] |
| 30 | [7499, 7521, 7654, 7698, 7844, 7900] | [7900, 7844, 7698, 7654, 7521, 7499] |
+--------+--------------------------------------+--------------------------------------+
(3 rows)
!ok
EnumerableAggregate(group=[{7}], EMPNOS_1=[COLLECT($0)], EMPNOS_2=[COLLECT($0) WITHIN GROUP ([0 DESC])])
EnumerableTableScan(table=[[scott, EMP]])
!plan
select deptno, collect(empno) within group (order by empno desc)
filter (where empno > 7500) as empnos
from "scott".emp
group by deptno;
+--------+--------------------------------+
| DEPTNO | EMPNOS |
+--------+--------------------------------+
| 10 | [7934, 7839, 7782] |
| 20 | [7902, 7876, 7788, 7566] |
| 30 | [7900, 7844, 7698, 7654, 7521] |
+--------+--------------------------------+
(3 rows)
!ok
EnumerableAggregate(group=[{0}], EMPNOS=[COLLECT($1) WITHIN GROUP ([1 DESC]) FILTER $2])
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[7500], expr#9=[>($t0, $t8)], DEPTNO=[$t7], EMPNO=[$t0], $f2=[$t9])
EnumerableTableScan(table=[[scott, EMP]])
!plan
select deptno, collect(empno) within group (order by empno desc) as empnos1,
collect(empno) within group (order by empno asc) as empnos2
from "scott".emp
group by deptno;
+--------+--------------------------------------+--------------------------------------+
| DEPTNO | EMPNOS1 | EMPNOS2 |
+--------+--------------------------------------+--------------------------------------+
| 10 | [7934, 7839, 7782] | [7782, 7839, 7934] |
| 20 | [7902, 7876, 7788, 7566, 7369] | [7369, 7566, 7788, 7876, 7902] |
| 30 | [7900, 7844, 7698, 7654, 7521, 7499] | [7499, 7521, 7654, 7698, 7844, 7900] |
+--------+--------------------------------------+--------------------------------------+
(3 rows)
!ok
EnumerableAggregate(group=[{7}], EMPNOS1=[COLLECT($0) WITHIN GROUP ([0 DESC])], EMPNOS2=[COLLECT($0) WITHIN GROUP ([0])])
EnumerableTableScan(table=[[scott, EMP]])
!plan
# Aggregate WITHIN GROUP with JOIN
select dept.deptno,
collect(sal) within group (order by sal desc) as s,
collect(sal) within group (order by 1)as s1,
collect(sal) within group (order by sal) filter (where sal > 2000) as s2
from "scott".emp
join "scott".dept using (deptno)
group by dept.deptno;
+--------+-------------------------------------------------------+-------------------------------------------------------+-----------------------------+
| DEPTNO | S | S1 | S2 |
+--------+-------------------------------------------------------+-------------------------------------------------------+-----------------------------+
| 10 | [5000.00, 2450.00, 1300.00] | [2450.00, 5000.00, 1300.00] | [2450.00, 5000.00] |
| 20 | [3000.00, 3000.00, 2975.00, 1100.00, 800.00] | [800.00, 2975.00, 3000.00, 1100.00, 3000.00] | [2975.00, 3000.00, 3000.00] |
| 30 | [2850.00, 1600.00, 1500.00, 1250.00, 1250.00, 950.00] | [1600.00, 1250.00, 1250.00, 2850.00, 1500.00, 950.00] | [2850.00] |
+--------+-------------------------------------------------------+-------------------------------------------------------+-----------------------------+
(3 rows)
!ok
EnumerableAggregate(group=[{0}], S=[COLLECT($1) WITHIN GROUP ([1 DESC])], S1=[COLLECT($1) WITHIN GROUP ([2])], S2=[COLLECT($1) WITHIN GROUP ([1]) FILTER $3])
EnumerableCalc(expr#0..3=[{inputs}], expr#4=[1], expr#5=[2000], expr#6=[>($t2, $t5)], expr#7=[IS TRUE($t6)], DEPTNO=[$t0], SAL=[$t2], $f2=[$t4], $f3=[$t7])
EnumerableHashJoin(condition=[=($0, $3)], joinType=[inner])
EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
EnumerableTableScan(table=[[scott, DEPT]])
EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5], DEPTNO=[$t7])
EnumerableTableScan(table=[[scott, EMP]])
!plan
select deptno, collect(empno + 1) within group (order by 1) as empnos
from "scott".emp
group by deptno;
+--------+--------------------------------------+
| DEPTNO | EMPNOS |
+--------+--------------------------------------+
| 10 | [7783, 7840, 7935] |
| 20 | [7370, 7567, 7789, 7877, 7903] |
| 30 | [7500, 7522, 7655, 7699, 7845, 7901] |
+--------+--------------------------------------+
(3 rows)
!ok
EnumerableAggregate(group=[{0}], EMPNOS=[COLLECT($1) WITHIN GROUP ([2])])
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1], expr#9=[+($t0, $t8)], DEPTNO=[$t7], $f1=[$t9], $f2=[$t8])
EnumerableTableScan(table=[[scott, EMP]])
!plan
# BIT_AND, BIT_OR, BIT_XOR aggregate functions
select bit_and(deptno), bit_or(deptno), bit_xor(deptno) from "scott".emp;
+--------+--------+--------+
| EXPR$0 | EXPR$1 | EXPR$2 |
+--------+--------+--------+
| 0 | 30 | 30 |
+--------+--------+--------+
(1 row)
!ok
select deptno, bit_and(empno), bit_or(empno), bit_xor(empno) from "scott".emp group by deptno;
+--------+--------+--------+--------+
| DEPTNO | EXPR$1 | EXPR$2 | EXPR$3 |
+--------+--------+--------+--------+
| 10 | 7686 | 7935 | 7687 |
| 20 | 7168 | 8191 | 7985 |
| 30 | 7168 | 8191 | 934 |
+--------+--------+--------+--------+
(3 rows)
!ok
# Based on [DRUID-7593] Exact distinct-COUNT with complex expression (CASE, IN) throws
# NullPointerException
WITH wikipedia AS (
SELECT empno AS delta,
CASE WHEN deptno = 10 THEN 'true' ELSE 'false' END AS isRobot,
ename AS "user"
FROM "scott".emp)
SELECT COUNT(DISTINCT
CASE WHEN (((CASE WHEN wikipedia.delta IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
THEN REPLACE('Yes', 'Yes', 'Yes')
ELSE REPLACE('No', 'No', 'No') END) = 'No'))
AND (wikipedia.isRobot = 'true')
THEN (wikipedia."user")
ELSE NULL END)
- (MAX(CASE WHEN (((CASE WHEN wikipedia.delta IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
THEN REPLACE('Yes', 'Yes', 'Yes')
ELSE REPLACE('No', 'No', 'No') END) = 'No'))
AND (wikipedia.isRobot = 'true')
THEN NULL
ELSE -9223372036854775807 END)
+ 9223372036854775807 + 1) AS "wikipedia.count_distinct_filters_that_dont_work"
FROM wikipedia
LIMIT 500;
+-------------------------------------------------+
| wikipedia.count_distinct_filters_that_dont_work |
+-------------------------------------------------+
| 2 |
+-------------------------------------------------+
(1 row)
!ok
# [CALCITE-2266] JSON_OBJECTAGG, JSON_ARRAYAGG
!use post
select gender, json_objectagg(ename: deptno absent on null) from emp group by gender;
+--------+-------------------------------------------------------+
| GENDER | EXPR$1 |
+--------+-------------------------------------------------------+
| F | {"Eve":50,"Grace":60,"Susan":30,"Alice":30,"Jane":10} |
| M | {"Adam":50,"Bob":10,"Eric":20} |
+--------+-------------------------------------------------------+
(2 rows)
!ok
select gender, json_arrayagg(json_object('ename': ename, 'deptno': deptno) format json) from emp group by gender;
+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| GENDER | EXPR$1 |
+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| F | [{"ename":"Jane","deptno":10},{"ename":"Susan","deptno":30},{"ename":"Alice","deptno":30},{"ename":"Eve","deptno":50},{"ename":"Grace","deptno":60},{"ename":"Wilma","deptno":null}] |
| M | [{"ename":"Bob","deptno":10},{"ename":"Eric","deptno":20},{"ename":"Adam","deptno":50}] |
+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
(2 rows)
!ok
select gender, json_arrayagg(json_object('ename': ename, 'deptno': deptno)) from emp group by gender;
+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| GENDER | EXPR$1 |
+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| F | [{"ename":"Jane","deptno":10},{"ename":"Susan","deptno":30},{"ename":"Alice","deptno":30},{"ename":"Eve","deptno":50},{"ename":"Grace","deptno":60},{"ename":"Wilma","deptno":null}] |
| M | [{"ename":"Bob","deptno":10},{"ename":"Eric","deptno":20},{"ename":"Adam","deptno":50}] |
+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
(2 rows)
!ok
select json_object('deptno': deptno, 'employees': json_arrayagg(json_object('ename': ename, 'gender': gender) format json) format json) from emp group by deptno;
+-------------------------------------------------------------------------------------------+
| EXPR$0 |
+-------------------------------------------------------------------------------------------+
| {"employees":[{"ename":"Adam","gender":"M"},{"ename":"Eve","gender":"F"}],"deptno":50} |
| {"employees":[{"ename":"Eric","gender":"M"}],"deptno":20} |
| {"employees":[{"ename":"Grace","gender":"F"}],"deptno":60} |
| {"employees":[{"ename":"Jane","gender":"F"},{"ename":"Bob","gender":"M"}],"deptno":10} |
| {"employees":[{"ename":"Susan","gender":"F"},{"ename":"Alice","gender":"F"}],"deptno":30} |
| {"employees":[{"ename":"Wilma","gender":"F"}],"deptno":null} |
+-------------------------------------------------------------------------------------------+
(6 rows)
!ok
select json_object('deptno': deptno, 'employees': json_arrayagg(json_object('ename': ename, 'gender': gender))) from emp group by deptno;
+-------------------------------------------------------------------------------------------+
| EXPR$0 |
+-------------------------------------------------------------------------------------------+
| {"employees":[{"ename":"Adam","gender":"M"},{"ename":"Eve","gender":"F"}],"deptno":50} |
| {"employees":[{"ename":"Eric","gender":"M"}],"deptno":20} |
| {"employees":[{"ename":"Grace","gender":"F"}],"deptno":60} |
| {"employees":[{"ename":"Jane","gender":"F"},{"ename":"Bob","gender":"M"}],"deptno":10} |
| {"employees":[{"ename":"Susan","gender":"F"},{"ename":"Alice","gender":"F"}],"deptno":30} |
| {"employees":[{"ename":"Wilma","gender":"F"}],"deptno":null} |
+-------------------------------------------------------------------------------------------+
(6 rows)
!ok
# [CALCITE-2786] Add order by clause support for JSON_ARRAYAGG
select gender,
json_arrayagg(deptno order by deptno),
json_arrayagg(deptno order by deptno desc)
from emp group by gender;
+--------+------------------+------------------+
| GENDER | EXPR$1 | EXPR$2 |
+--------+------------------+------------------+
| F | [10,30,30,50,60] | [60,50,30,30,10] |
| M | [10,20,50] | [50,20,10] |
+--------+------------------+------------------+
(2 rows)
!ok
EnumerableAggregate(group=[{1}], EXPR$1=[JSON_ARRAYAGG_ABSENT_ON_NULL($0) WITHIN GROUP ([0])], EXPR$2=[JSON_ARRAYAGG_ABSENT_ON_NULL($0) WITHIN GROUP ([0 DESC])])
EnumerableValues(tuples=[[{ 10, 'F' }, { 10, 'M' }, { 20, 'M' }, { 30, 'F' }, { 30, 'F' }, { 50, 'M' }, { 50, 'F' }, { 60, 'F' }, { null, 'F' }]])
!plan
# [CALCITE-2787] Json aggregate calls with different null clause get incorrectly merged
# during converting from SQL to relational algebra
select gender,
json_arrayagg(deptno),
json_arrayagg(deptno null on null)
from emp group by gender;
+--------+------------------+-----------------------+
| GENDER | EXPR$1 | EXPR$2 |
+--------+------------------+-----------------------+
| F | [10,30,30,50,60] | [10,30,30,50,60,null] |
| M | [10,20,50] | [10,20,50] |
+--------+------------------+-----------------------+
(2 rows)
!ok
EnumerableAggregate(group=[{1}], EXPR$1=[JSON_ARRAYAGG_ABSENT_ON_NULL($0)], EXPR$2=[JSON_ARRAYAGG_NULL_ON_NULL($0)])
EnumerableValues(tuples=[[{ 10, 'F' }, { 10, 'M' }, { 20, 'M' }, { 30, 'F' }, { 30, 'F' }, { 50, 'M' }, { 50, 'F' }, { 60, 'F' }, { null, 'F' }]])
!plan
select gender,
json_objectagg(ename: deptno),
json_objectagg(ename: deptno absent on null)
from emp group by gender;
+--------+--------------------------------------------------------------------+-------------------------------------------------------+
| GENDER | EXPR$1 | EXPR$2 |
+--------+--------------------------------------------------------------------+-------------------------------------------------------+
| F | {"Eve":50,"Grace":60,"Wilma":null,"Susan":30,"Alice":30,"Jane":10} | {"Eve":50,"Grace":60,"Susan":30,"Alice":30,"Jane":10} |
| M | {"Adam":50,"Bob":10,"Eric":20} | {"Adam":50,"Bob":10,"Eric":20} |
+--------+--------------------------------------------------------------------+-------------------------------------------------------+
(2 rows)
!ok
EnumerableAggregate(group=[{2}], EXPR$1=[JSON_OBJECTAGG_NULL_ON_NULL($0, $1)], EXPR$2=[JSON_OBJECTAGG_ABSENT_ON_NULL($0, $1)])
EnumerableValues(tuples=[[{ 'Jane', 10, 'F' }, { 'Bob', 10, 'M' }, { 'Eric', 20, 'M' }, { 'Susan', 30, 'F' }, { 'Alice', 30, 'F' }, { 'Adam', 50, 'M' }, { 'Eve', 50, 'F' }, { 'Grace', 60, 'F' }, { 'Wilma', null, 'F' }]])
!plan
select listagg(ename) as combined_name from emp;
+------------------------------------------------+
| COMBINED_NAME |
+------------------------------------------------+
| Jane,Bob,Eric,Susan,Alice,Adam,Eve,Grace,Wilma |
+------------------------------------------------+
(1 row)
!ok
select listagg(ename) within group(order by gender, ename) as combined_name from emp;
+------------------------------------------------+
| COMBINED_NAME |
+------------------------------------------------+
| Alice,Eve,Grace,Jane,Susan,Wilma,Adam,Bob,Eric |
+------------------------------------------------+
(1 row)
!ok
EnumerableAggregate(group=[{}], COMBINED_NAME=[LISTAGG($0) WITHIN GROUP ([2, 0])])
EnumerableValues(tuples=[[{ 'Jane', 10, 'F' }, { 'Bob', 10, 'M' }, { 'Eric', 20, 'M' }, { 'Susan', 30, 'F' }, { 'Alice', 30, 'F' }, { 'Adam', 50, 'M' }, { 'Eve', 50, 'F' }, { 'Grace', 60, 'F' }, { 'Wilma', null, 'F' }]])
!plan
select
listagg(ename) within group(order by deptno, ename) as default_listagg_sep,
listagg(ename, '; ') within group(order by deptno, ename desc) as custom_listagg_sep
from emp group by gender;
+----------------------------------+---------------------------------------+
| DEFAULT_LISTAGG_SEP | CUSTOM_LISTAGG_SEP |
+----------------------------------+---------------------------------------+
| Bob,Eric,Adam | Bob; Eric; Adam |
| Jane,Alice,Susan,Eve,Grace,Wilma | Jane; Susan; Alice; Eve; Grace; Wilma |
+----------------------------------+---------------------------------------+
(2 rows)
!ok
!use mysqlfunc
# GROUP_CONCAT (MySQL) is very similar to LISTAGG.
# GROUP_CONCAT with DISTINCT, SEPARATOR
select
group_concat(distinct ename order by ename) as combined_name,
group_concat(ename order by ename separator ';') as separated_name
from emp;
+------------------------------------------------+------------------------------------------------+
| COMBINED_NAME | SEPARATED_NAME |
+------------------------------------------------+------------------------------------------------+
| Adam,Alice,Bob,Eric,Eve,Grace,Jane,Susan,Wilma | Adam;Alice;Bob;Eric;Eve;Grace;Jane;Susan;Wilma |
+------------------------------------------------+------------------------------------------------+
(1 row)
!ok
# GROUP_CONCAT with multiple columns
select
group_concat(deptno, ename order by ename) as combined_name
from emp;
+-----------------------------------------------+
| COMBINED_NAME |
+-----------------------------------------------+
| 50Alice30Bob10Eric20Eve50Grace60Jane10Susan30 |
+-----------------------------------------------+
(1 row)
!ok
# We currently do not support GROUP_CONCAT with composite columns and
# SEPARATOR because LIST_AGG does not support it.
!if (false) {
select
group_concat(deptno, ename order by ename separator ';') as separated_name
from emp;
!ok
!}
!use post-big-query
# STRING_AGG (BigQuery and PostgreSQL) is very similar to LISTAGG.
select
string_agg(ename order by deptno, ename) as default_string_agg_sep,
string_agg(ename, '; ' order by deptno, ename desc) as custom_string_agg_sep
from emp group by gender;
+----------------------------------+---------------------------------------+
| DEFAULT_STRING_AGG_SEP | CUSTOM_STRING_AGG_SEP |
+----------------------------------+---------------------------------------+
| Bob,Eric,Adam | Bob; Eric; Adam |
| Jane,Alice,Susan,Eve,Grace,Wilma | Jane; Susan; Alice; Eve; Grace; Wilma |
+----------------------------------+---------------------------------------+
(2 rows)
!ok
# COUNTIF(b) (BigQuery) is equivalent to COUNT(*) FILTER (WHERE b)
select deptno, countif(gender = 'F') as f
from emp
group by deptno;
+--------+---+
| DEPTNO | F |
+--------+---+
| 10 | 1 |
| 20 | 0 |
| 30 | 2 |
| 50 | 1 |
| 60 | 1 |
| | 1 |
+--------+---+
(6 rows)
!ok
select countif(gender = 'F') filter (where deptno = 30) as f
from emp;
+---+
| F |
+---+
| 2 |
+---+
(1 row)
!ok
select countif(a > 0) + countif(a > 1) + countif(c > 1) as c
from (select 1 as a, 2 as b, 3 as c);
+---+
| C |
+---+
| 2 |
+---+
(1 row)
!ok
# [CALCITE-3661] Add MODE aggregate function
# MODE without GROUP BY
select MODE(gender) as m
from emp;
+---+
| M |
+---+
| F |
+---+
(1 row)
!ok
# MODE with DISTINCT is pretty much useless (because every value occurs once),
# but we allow it. It returns the first value seen, in this case 'F'.
select MODE(distinct gender) as m
from emp;
+---+
| M |
+---+
| F |
+---+
(1 row)
!ok
# MODE function with WHERE.
select MODE(gender) as m
from emp
where deptno <= 20;
+---+
| M |
+---+
| M |
+---+
(1 row)
!ok
# MODE function with WHERE that removes all rows.
# Result is NULL even though MODE is applied to a not-NULL column.
select MODE(gender) as m
from emp
where deptno > 60;
+---+
| M |
+---+
| |
+---+
(1 row)
!ok
# MODE function with GROUP BY.
select deptno, MODE(gender) as m
from emp
where deptno > 10
group by deptno;
+--------+---+
| DEPTNO | M |
+--------+---+
| 20 | M |
| 30 | F |
| 50 | M |
| 60 | F |
+--------+---+
(4 rows)
!ok
# MODE function with GROUP BY; note that key is NULL but result is not NULL.
select deptno, MODE(gender) as m
from emp
where ename = 'Wilma'
group by deptno;
+--------+---+
| DEPTNO | M |
+--------+---+
| | F |
+--------+---+
(1 row)
!ok
# MODE function with GROUP BY; key is NULL and input value is NULL.
select deptno, MODE(deptno) as m
from emp
where ename = 'Wilma'
group by deptno;
+--------+---+
| DEPTNO | M |
+--------+---+
| | |
+--------+---+
(1 row)
!ok
# MODE function applied to NULL value.
# (Calcite requires CAST so that it can deduce type.)
select deptno, MODE(CAST(null AS INTEGER)) as m
from emp
group by deptno;
+--------+---+
| DEPTNO | M |
+--------+---+
| 10 | |
| 20 | |
| 30 | |
| 50 | |
| 60 | |
| | |
+--------+---+
(6 rows)
!ok
# MODE function with GROUPING SETS.
select deptno, ename, MODE(gender) as m
from emp
group by grouping sets (deptno, ename);
+--------+-------+---+
| DEPTNO | ENAME | M |
+--------+-------+---+
| 10 | | F |
| 20 | | M |
| 30 | | F |
| 50 | | M |
| 60 | | F |
| | Adam | M |
| | Alice | F |
| | Bob | M |
| | Eric | M |
| | Eve | F |
| | Grace | F |
| | Jane | F |
| | Susan | F |
| | Wilma | F |
| | | F |
+--------+-------+---+
(15 rows)
!ok
# [CALCITE-4665] Allow Aggregate.groupKey to be a strict superset of
# Aggregate.groupKeys
# Use a condition on grouping_id to filter out the superset grouping sets.
select ename, deptno, gender, grouping(ename) as g_e,
grouping(deptno) as g_d, grouping(gender) as g_g
from emp
where gender = 'M'
group by grouping sets (ename, deptno, (ename, deptno),
(ename, deptno, gender))
having grouping_id(ename, deptno, gender) <> 0
order by ename, deptno;
+-------+--------+--------+-----+-----+-----+
| ENAME | DEPTNO | GENDER | G_E | G_D | G_G |
+-------+--------+--------+-----+-----+-----+
| Adam | 50 | | 0 | 0 | 1 |
| Adam | | | 0 | 1 | 1 |
| Bob | 10 | | 0 | 0 | 1 |
| Bob | | | 0 | 1 | 1 |
| Eric | 20 | | 0 | 0 | 1 |
| Eric | | | 0 | 1 | 1 |
| | 10 | | 1 | 0 | 1 |
| | 20 | | 1 | 0 | 1 |
| | 50 | | 1 | 0 | 1 |
+-------+--------+--------+-----+-----+-----+
(9 rows)
!ok
# just a comparison about the above sql
select ename, deptno, grouping(ename) as g_e,
grouping(deptno) as g_d
from emp
where gender = 'M'
group by grouping sets (ename, deptno, (ename, deptno))
order by ename, deptno;
+-------+--------+-----+-----+
| ENAME | DEPTNO | G_E | G_D |
+-------+--------+-----+-----+
| Adam | 50 | 0 | 0 |
| Adam | | 0 | 1 |
| Bob | 10 | 0 | 0 |
| Bob | | 0 | 1 |
| Eric | 20 | 0 | 0 |
| Eric | | 0 | 1 |
| | 10 | 1 | 0 |
| | 20 | 1 | 0 |
| | 50 | 1 | 0 |
+-------+--------+-----+-----+
(9 rows)
!ok
# Test cases for [CALCITE-5209] Proper sub-query handling if it is used inside select list and group by
!use scott
select
case when deptno in (1, 2, 3, 4, 5) THEN 1 else 0 end
from emp
group by
case when deptno in (1, 2, 3, 4, 5) THEN 1 else 0 end;
+--------+
| EXPR$0 |
+--------+
| 0 |
+--------+
(1 row)
!ok
!set insubquerythreshold 5
select
case when deptno in (1, 2, 3, 4, 5) THEN 1 else 0 end
from emp
group by
case when deptno in (1, 2, 3, 4, 5) THEN 1 else 0 end;
+--------+
| EXPR$0 |
+--------+
| 0 |
+--------+
(1 row)
!ok
# Test case for [CALCITE-5388] tempList expression inside EnumerableWindow.getPartitionIterator should be unoptimized
with
CTE1(rownr1, val1) as ( select ROW_NUMBER() OVER(ORDER BY id ASC), id from (values (1), (2)) as Vals1(id) ),
CTE2(rownr2, val2) as ( select ROW_NUMBER() OVER(ORDER BY id ASC), id from (values (1), (2)) as Vals2(id) )
select
CTE1.rownr1, CTE1.val1, CTE2.rownr2, CTE2.val2
from
CTE1,CTE2
where
CTE1.val1 = CTE2.val2;
+--------+------+--------+------+
| ROWNR1 | VAL1 | ROWNR2 | VAL2 |
+--------+------+--------+------+
| 1 | 1 | 1 | 1 |
| 2 | 2 | 2 | 2 |
+--------+------+--------+------+
(2 rows)
!ok
# [CALCITE-5283] Add ARG_MIN, ARG_MAX aggregate function
# ARG_MIN, ARG_MAX without GROUP BY
select arg_min(ename, deptno) as mi, arg_max(ename, deptno) as ma
from emp;
+-------+-------+
| MI | MA |
+-------+-------+
| CLARK | ALLEN |
+-------+-------+
(1 row)
!ok
# ARG_MIN, ARG_MAX with DISTINCT
select arg_min(distinct ename, deptno) as mi, arg_max(distinct ename, deptno) as ma
from emp;
+-------+-------+
| MI | MA |
+-------+-------+
| CLARK | ALLEN |
+-------+-------+
(1 row)
!ok
# ARG_MIN, ARG_MAX function with WHERE.
select arg_min(ename, deptno) as mi, arg_max(ename, deptno) as ma
from emp
where deptno <= 20;
+-------+-------+
| MI | MA |
+-------+-------+
| CLARK | SMITH |
+-------+-------+
(1 row)
!ok
# ARG_MIN, ARG_MAX function with WHERE that removes all rows.
# Result is NULL even though ARG_MIN, ARG_MAX is applied to a not-NULL column.
select arg_min(ename, deptno) as mi, arg_max(ename, deptno) as ma
from emp
where deptno > 60;
+----+----+
| MI | MA |
+----+----+
| | |
+----+----+
(1 row)
!ok
# ARG_MIN, ARG_MAX function with GROUP BY. note that key is NULL but result is not NULL.
select deptno, arg_min(ename, ename) as mi, arg_max(ename, ename) as ma
from emp
group by deptno;
+--------+-------+--------+
| DEPTNO | MI | MA |
+--------+-------+--------+
| 10 | CLARK | MILLER |
| 20 | ADAMS | SMITH |
| 30 | ALLEN | WARD |
+--------+-------+--------+
(3 rows)
!ok
# ARG_MIN, ARG_MAX applied to an integer.
select arg_min(deptno, empno) as mi,
arg_max(deptno, empno) as ma,
arg_max(deptno, empno) filter (where job = 'MANAGER') as mamgr
from emp;
+----+----+-------+
| MI | MA | MAMGR |
+----+----+-------+
| 20 | 10 | 10 |
+----+----+-------+
(1 row)
!ok
# DISTINCT query with ORDER BY on aggregate when there is an implicit cast
select distinct sum(deptno + '1') as deptsum from dept order by 1;
+---------+
| DEPTSUM |
+---------+
| 104 |
+---------+
(1 row)
!ok
# End agg.iq