| # basic aggregation |
| select count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col), |
| avg(tinyint_col) |
| from functional.alltypesagg |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col) |
| | row-size=34B cardinality=1 |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| row-size=1B cardinality=11.00K |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 03:AGGREGATE [FINALIZE] |
| | output: count:merge(*), count:merge(tinyint_col), min:merge(tinyint_col), max:merge(tinyint_col), sum:merge(tinyint_col), avg:merge(tinyint_col) |
| | row-size=34B cardinality=1 |
| | |
| 02:EXCHANGE [UNPARTITIONED] |
| | |
| 01:AGGREGATE |
| | output: count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col) |
| | row-size=34B cardinality=3 |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| row-size=1B cardinality=11.00K |
| ==== |
| # with grouping |
| select tinyint_col, bigint_col, count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), |
| avg(tinyint_col) |
| from functional.alltypesagg |
| group by 2, 1 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col) |
| | group by: bigint_col, tinyint_col |
| | row-size=35B cardinality=9.07K |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| row-size=9B cardinality=11.00K |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 04:EXCHANGE [UNPARTITIONED] |
| | |
| 03:AGGREGATE [FINALIZE] |
| | output: count:merge(*), min:merge(tinyint_col), max:merge(tinyint_col), sum:merge(tinyint_col), avg:merge(tinyint_col) |
| | group by: bigint_col, tinyint_col |
| | row-size=35B cardinality=9.05K |
| | |
| 02:EXCHANGE [HASH(bigint_col,tinyint_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col) |
| | group by: bigint_col, tinyint_col |
| | row-size=35B cardinality=9.05K |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| row-size=9B cardinality=11.00K |
| ==== |
| # avg substitution |
| select avg(id) |
| from functional.testtbl |
| having count(id) > 0 |
| order by avg(zip) limit 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:TOP-N [LIMIT=10] |
| | order by: avg(zip) ASC |
| | row-size=16B cardinality=1 |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: avg(id), count(id), avg(zip) |
| | having: count(id) > 0 |
| | row-size=24B cardinality=1 |
| | |
| 00:SCAN HDFS [functional.testtbl] |
| HDFS partitions=1/1 files=0 size=0B |
| row-size=12B cardinality=0 |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 02:TOP-N [LIMIT=10] |
| | order by: avg(zip) ASC |
| | row-size=16B cardinality=1 |
| | |
| 04:AGGREGATE [FINALIZE] |
| | output: avg:merge(id), count:merge(id), avg:merge(zip) |
| | having: count(id) > 0 |
| | row-size=24B cardinality=1 |
| | |
| 03:EXCHANGE [UNPARTITIONED] |
| | |
| 01:AGGREGATE |
| | output: avg(id), count(id), avg(zip) |
| | row-size=24B cardinality=0 |
| | |
| 00:SCAN HDFS [functional.testtbl] |
| HDFS partitions=1/1 files=0 size=0B |
| row-size=12B cardinality=0 |
| ==== |
| # Test correct removal of redundant group-by expressions (IMPALA-817) |
| select int_col + int_col, int_col * int_col |
| from functional.alltypesagg |
| group by int_col + int_col, int_col * int_col, int_col + int_col |
| having (int_col * int_col) < 0 limit 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 01:AGGREGATE [FINALIZE] |
| | group by: int_col + int_col, int_col * int_col |
| | having: int_col * int_col < 0 |
| | limit: 10 |
| | row-size=16B cardinality=10 |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| row-size=4B cardinality=11.00K |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 04:EXCHANGE [UNPARTITIONED] |
| | limit: 10 |
| | |
| 03:AGGREGATE [FINALIZE] |
| | group by: int_col + int_col, int_col * int_col |
| | having: int_col * int_col < 0 |
| | limit: 10 |
| | row-size=16B cardinality=10 |
| | |
| 02:EXCHANGE [HASH(int_col + int_col,int_col * int_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | group by: int_col + int_col, int_col * int_col |
| | row-size=16B cardinality=11.00K |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| row-size=4B cardinality=11.00K |
| ==== |
| # Tests that a having predicate triggers slot materialization (IMPALA-846). |
| select count(*) from |
| functional.alltypes t1 inner join functional.alltypestiny t2 |
| on t1.smallint_col = t2.smallint_col |
| group by t1.tinyint_col, t2.smallint_col |
| having count(t2.int_col) = count(t1.bigint_col) |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 03:AGGREGATE [FINALIZE] |
| | output: count(*), count(t2.int_col), count(t1.bigint_col) |
| | group by: t1.tinyint_col, t2.smallint_col |
| | having: count(t2.int_col) = count(t1.bigint_col) |
| | row-size=27B cardinality=2 |
| | |
| 02:HASH JOIN [INNER JOIN] |
| | hash predicates: t1.smallint_col = t2.smallint_col |
| | runtime filters: RF000 <- t2.smallint_col |
| | row-size=17B cardinality=5.84K |
| | |
| |--01:SCAN HDFS [functional.alltypestiny t2] |
| | HDFS partitions=4/4 files=4 size=460B |
| | row-size=6B cardinality=8 |
| | |
| 00:SCAN HDFS [functional.alltypes t1] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| runtime filters: RF000 -> t1.smallint_col |
| row-size=11B cardinality=7.30K |
| ==== |
| # Tests proper slot materialization of agg-tuple slots for avg (IMP-1271). |
| # 't.x > 10' is picked up as an unassigned conjunct, and not as a binding |
| # predicate because avg gets rewritten into an expr against two slots |
| # (and getBoundPredicates() cannot handle multi-slot predicates). |
| select 1 from |
| (select int_col, avg(bigint_col) x from functional.alltypes |
| group by int_col) t |
| where t.x > 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: avg(bigint_col) |
| | group by: int_col |
| | having: avg(bigint_col) > 10 |
| | row-size=12B cardinality=1 |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| row-size=12B cardinality=7.30K |
| ==== |
| # test distributed aggregation over unions (IMPALA-831) |
| # non-distinct agg without grouping over a union |
| select count(*) from |
| (select * from functional.alltypes |
| union all |
| select * from functional.alltypessmall) t |
| limit 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 03:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | limit: 10 |
| | row-size=8B cardinality=1 |
| | |
| 00:UNION |
| | pass-through-operands: all |
| | row-size=0B cardinality=7.40K |
| | |
| |--02:SCAN HDFS [functional.alltypessmall] |
| | HDFS partitions=4/4 files=4 size=6.32KB |
| | row-size=0B cardinality=100 |
| | |
| 01:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| row-size=0B cardinality=7.30K |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 05:AGGREGATE [FINALIZE] |
| | output: count:merge(*) |
| | limit: 10 |
| | row-size=8B cardinality=1 |
| | |
| 04:EXCHANGE [UNPARTITIONED] |
| | |
| 03:AGGREGATE |
| | output: count(*) |
| | row-size=8B cardinality=3 |
| | |
| 00:UNION |
| | pass-through-operands: all |
| | row-size=0B cardinality=7.40K |
| | |
| |--02:SCAN HDFS [functional.alltypessmall] |
| | HDFS partitions=4/4 files=4 size=6.32KB |
| | row-size=0B cardinality=100 |
| | |
| 01:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| row-size=0B cardinality=7.30K |
| ==== |
| # non-distinct agg with grouping over a union |
| select count(*) from |
| (select * from functional.alltypes |
| union all |
| select * from functional.alltypessmall) t |
| group by t.bigint_col |
| limit 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 03:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | group by: bigint_col |
| | limit: 10 |
| | row-size=16B cardinality=10 |
| | |
| 00:UNION |
| | pass-through-operands: all |
| | row-size=8B cardinality=7.40K |
| | |
| |--02:SCAN HDFS [functional.alltypessmall] |
| | HDFS partitions=4/4 files=4 size=6.32KB |
| | row-size=8B cardinality=100 |
| | |
| 01:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| row-size=8B cardinality=7.30K |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 06:EXCHANGE [UNPARTITIONED] |
| | limit: 10 |
| | |
| 05:AGGREGATE [FINALIZE] |
| | output: count:merge(*) |
| | group by: t.bigint_col |
| | limit: 10 |
| | row-size=16B cardinality=10 |
| | |
| 04:EXCHANGE [HASH(t.bigint_col)] |
| | |
| 03:AGGREGATE [STREAMING] |
| | output: count(*) |
| | group by: bigint_col |
| | row-size=16B cardinality=60 |
| | |
| 00:UNION |
| | pass-through-operands: all |
| | row-size=8B cardinality=7.40K |
| | |
| |--02:SCAN HDFS [functional.alltypessmall] |
| | HDFS partitions=4/4 files=4 size=6.32KB |
| | row-size=8B cardinality=100 |
| | |
| 01:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| row-size=8B cardinality=7.30K |
| ==== |
| # distinct agg without grouping over a union |
| select count(distinct int_col) |
| from |
| (select * from functional.alltypes |
| union all |
| select * from functional.alltypessmall) t |
| limit 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 04:AGGREGATE [FINALIZE] |
| | output: count(int_col) |
| | limit: 10 |
| | row-size=8B cardinality=1 |
| | |
| 03:AGGREGATE |
| | group by: int_col |
| | row-size=4B cardinality=20 |
| | |
| 00:UNION |
| | pass-through-operands: all |
| | row-size=4B cardinality=7.40K |
| | |
| |--02:SCAN HDFS [functional.alltypessmall] |
| | HDFS partitions=4/4 files=4 size=6.32KB |
| | row-size=4B cardinality=100 |
| | |
| 01:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| row-size=4B cardinality=7.30K |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 08:AGGREGATE [FINALIZE] |
| | output: count:merge(int_col) |
| | limit: 10 |
| | row-size=8B cardinality=1 |
| | |
| 07:EXCHANGE [UNPARTITIONED] |
| | |
| 04:AGGREGATE |
| | output: count(int_col) |
| | row-size=8B cardinality=1 |
| | |
| 06:AGGREGATE |
| | group by: int_col |
| | row-size=4B cardinality=20 |
| | |
| 05:EXCHANGE [HASH(int_col)] |
| | |
| 03:AGGREGATE [STREAMING] |
| | group by: int_col |
| | row-size=4B cardinality=60 |
| | |
| 00:UNION |
| | pass-through-operands: all |
| | row-size=4B cardinality=7.40K |
| | |
| |--02:SCAN HDFS [functional.alltypessmall] |
| | HDFS partitions=4/4 files=4 size=6.32KB |
| | row-size=4B cardinality=100 |
| | |
| 01:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| row-size=4B cardinality=7.30K |
| ==== |
| # distinct agg with grouping over a union |
| select count(distinct int_col) |
| from |
| (select * from functional.alltypes |
| union all |
| select * from functional.alltypessmall) t |
| group by t.bigint_col |
| limit 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 04:AGGREGATE [FINALIZE] |
| | output: count(int_col) |
| | group by: t.bigint_col |
| | limit: 10 |
| | row-size=16B cardinality=10 |
| | |
| 03:AGGREGATE |
| | group by: bigint_col, int_col |
| | row-size=12B cardinality=400 |
| | |
| 00:UNION |
| | pass-through-operands: all |
| | row-size=12B cardinality=7.40K |
| | |
| |--02:SCAN HDFS [functional.alltypessmall] |
| | HDFS partitions=4/4 files=4 size=6.32KB |
| | row-size=12B cardinality=100 |
| | |
| 01:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| row-size=12B cardinality=7.30K |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 09:EXCHANGE [UNPARTITIONED] |
| | limit: 10 |
| | |
| 08:AGGREGATE [FINALIZE] |
| | output: count:merge(int_col) |
| | group by: t.bigint_col |
| | limit: 10 |
| | row-size=16B cardinality=10 |
| | |
| 07:EXCHANGE [HASH(t.bigint_col)] |
| | |
| 04:AGGREGATE [STREAMING] |
| | output: count(int_col) |
| | group by: t.bigint_col |
| | row-size=16B cardinality=60 |
| | |
| 06:AGGREGATE |
| | group by: t.bigint_col, int_col |
| | row-size=12B cardinality=400 |
| | |
| 05:EXCHANGE [HASH(t.bigint_col,int_col)] |
| | |
| 03:AGGREGATE [STREAMING] |
| | group by: bigint_col, int_col |
| | row-size=12B cardinality=1.20K |
| | |
| 00:UNION |
| | pass-through-operands: all |
| | row-size=12B cardinality=7.40K |
| | |
| |--02:SCAN HDFS [functional.alltypessmall] |
| | HDFS partitions=4/4 files=4 size=6.32KB |
| | row-size=12B cardinality=100 |
| | |
| 01:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| row-size=12B cardinality=7.30K |
| ==== |
| # mixed distinct and non-distinct agg without grouping over a union |
| select count(smallint_col), count(distinct int_col) |
| from |
| (select * from functional.alltypes |
| union all |
| select * from functional.alltypessmall) t |
| limit 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 04:AGGREGATE [FINALIZE] |
| | output: count(int_col), count:merge(smallint_col) |
| | limit: 10 |
| | row-size=16B cardinality=1 |
| | |
| 03:AGGREGATE |
| | output: count(smallint_col) |
| | group by: int_col |
| | row-size=12B cardinality=20 |
| | |
| 00:UNION |
| | pass-through-operands: all |
| | row-size=6B cardinality=7.40K |
| | |
| |--02:SCAN HDFS [functional.alltypessmall] |
| | HDFS partitions=4/4 files=4 size=6.32KB |
| | row-size=6B cardinality=100 |
| | |
| 01:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| row-size=6B cardinality=7.30K |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 08:AGGREGATE [FINALIZE] |
| | output: count:merge(int_col), count:merge(smallint_col) |
| | limit: 10 |
| | row-size=16B cardinality=1 |
| | |
| 07:EXCHANGE [UNPARTITIONED] |
| | |
| 04:AGGREGATE |
| | output: count(int_col), count:merge(smallint_col) |
| | row-size=16B cardinality=1 |
| | |
| 06:AGGREGATE |
| | output: count:merge(smallint_col) |
| | group by: int_col |
| | row-size=12B cardinality=20 |
| | |
| 05:EXCHANGE [HASH(int_col)] |
| | |
| 03:AGGREGATE [STREAMING] |
| | output: count(smallint_col) |
| | group by: int_col |
| | row-size=12B cardinality=60 |
| | |
| 00:UNION |
| | pass-through-operands: all |
| | row-size=6B cardinality=7.40K |
| | |
| |--02:SCAN HDFS [functional.alltypessmall] |
| | HDFS partitions=4/4 files=4 size=6.32KB |
| | row-size=6B cardinality=100 |
| | |
| 01:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| row-size=6B cardinality=7.30K |
| ==== |
| # mixed distinct and non-distinct agg with grouping over a union |
| select count(smallint_col), count(distinct int_col) |
| from |
| (select * from functional.alltypes |
| union all |
| select * from functional.alltypessmall) t |
| group by t.bigint_col |
| limit 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 04:AGGREGATE [FINALIZE] |
| | output: count(int_col), count:merge(smallint_col) |
| | group by: t.bigint_col |
| | limit: 10 |
| | row-size=24B cardinality=10 |
| | |
| 03:AGGREGATE |
| | output: count(smallint_col) |
| | group by: bigint_col, int_col |
| | row-size=20B cardinality=400 |
| | |
| 00:UNION |
| | pass-through-operands: all |
| | row-size=14B cardinality=7.40K |
| | |
| |--02:SCAN HDFS [functional.alltypessmall] |
| | HDFS partitions=4/4 files=4 size=6.32KB |
| | row-size=14B cardinality=100 |
| | |
| 01:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| row-size=14B cardinality=7.30K |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 09:EXCHANGE [UNPARTITIONED] |
| | limit: 10 |
| | |
| 08:AGGREGATE [FINALIZE] |
| | output: count:merge(int_col), count:merge(smallint_col) |
| | group by: t.bigint_col |
| | limit: 10 |
| | row-size=24B cardinality=10 |
| | |
| 07:EXCHANGE [HASH(t.bigint_col)] |
| | |
| 04:AGGREGATE [STREAMING] |
| | output: count(int_col), count:merge(smallint_col) |
| | group by: t.bigint_col |
| | row-size=24B cardinality=60 |
| | |
| 06:AGGREGATE |
| | output: count:merge(smallint_col) |
| | group by: t.bigint_col, int_col |
| | row-size=20B cardinality=400 |
| | |
| 05:EXCHANGE [HASH(t.bigint_col,int_col)] |
| | |
| 03:AGGREGATE [STREAMING] |
| | output: count(smallint_col) |
| | group by: bigint_col, int_col |
| | row-size=20B cardinality=1.20K |
| | |
| 00:UNION |
| | pass-through-operands: all |
| | row-size=14B cardinality=7.40K |
| | |
| |--02:SCAN HDFS [functional.alltypessmall] |
| | HDFS partitions=4/4 files=4 size=6.32KB |
| | row-size=14B cardinality=100 |
| | |
| 01:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| row-size=14B cardinality=7.30K |
| ==== |
| # mixed distinct and non-distinct agg with grouping over a union distinct |
| select count(smallint_col), count(distinct int_col) |
| from |
| (select * from functional.alltypes |
| union distinct |
| select * from functional.alltypessmall) t |
| group by t.bigint_col |
| limit 10 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 05:AGGREGATE [FINALIZE] |
| | output: count(int_col), count:merge(smallint_col) |
| | group by: t.bigint_col |
| | limit: 10 |
| | row-size=24B cardinality=10 |
| | |
| 04:AGGREGATE |
| | output: count(smallint_col) |
| | group by: bigint_col, int_col |
| | row-size=20B cardinality=400 |
| | |
| 03:AGGREGATE [FINALIZE] |
| | group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month |
| | row-size=89B cardinality=7.40K |
| | |
| 00:UNION |
| | pass-through-operands: all |
| | row-size=89B cardinality=7.40K |
| | |
| |--02:SCAN HDFS [functional.alltypessmall] |
| | HDFS partitions=4/4 files=4 size=6.32KB |
| | row-size=89B cardinality=100 |
| | |
| 01:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| row-size=89B cardinality=7.30K |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 12:EXCHANGE [UNPARTITIONED] |
| | limit: 10 |
| | |
| 11:AGGREGATE [FINALIZE] |
| | output: count:merge(int_col), count:merge(smallint_col) |
| | group by: t.bigint_col |
| | limit: 10 |
| | row-size=24B cardinality=10 |
| | |
| 10:EXCHANGE [HASH(t.bigint_col)] |
| | |
| 05:AGGREGATE [STREAMING] |
| | output: count(int_col), count:merge(smallint_col) |
| | group by: t.bigint_col |
| | row-size=24B cardinality=60 |
| | |
| 09:AGGREGATE |
| | output: count:merge(smallint_col) |
| | group by: t.bigint_col, int_col |
| | row-size=20B cardinality=400 |
| | |
| 08:EXCHANGE [HASH(t.bigint_col,int_col)] |
| | |
| 04:AGGREGATE [STREAMING] |
| | output: count(smallint_col) |
| | group by: bigint_col, int_col |
| | row-size=20B cardinality=1.20K |
| | |
| 07:AGGREGATE [FINALIZE] |
| | group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month |
| | row-size=89B cardinality=7.40K |
| | |
| 06:EXCHANGE [HASH(id,bool_col,tinyint_col,smallint_col,int_col,bigint_col,float_col,double_col,date_string_col,string_col,timestamp_col,year,month)] |
| | |
| 03:AGGREGATE [STREAMING] |
| | group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month |
| | row-size=89B cardinality=7.40K |
| | |
| 00:UNION |
| | pass-through-operands: all |
| | row-size=89B cardinality=7.40K |
| | |
| |--02:SCAN HDFS [functional.alltypessmall] |
| | HDFS partitions=4/4 files=4 size=6.32KB |
| | row-size=89B cardinality=100 |
| | |
| 01:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| row-size=89B cardinality=7.30K |
| ==== |
| # Mixed distinct and non-distinct agg with intermediate type different from input type |
| # Regression test for IMPALA-5251 to exercise validateMergeAggFn() in FunctionCallExpr. |
| select avg(l_quantity), ndv(l_discount), count(distinct l_partkey) |
| from tpch_parquet.lineitem; |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: count(l_partkey), avg:merge(l_quantity), ndv:merge(l_discount) |
| | row-size=24B cardinality=1 |
| | |
| 01:AGGREGATE |
| | output: avg(l_quantity), ndv(l_discount) |
| | group by: l_partkey |
| | row-size=24B cardinality=200.52K |
| | |
| 00:SCAN HDFS [tpch_parquet.lineitem] |
| HDFS partitions=1/1 files=3 size=193.99MB |
| row-size=24B cardinality=6.00M |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: count:merge(l_partkey), avg:merge(l_quantity), ndv:merge(l_discount) |
| | row-size=24B cardinality=1 |
| | |
| 05:EXCHANGE [UNPARTITIONED] |
| | |
| 02:AGGREGATE |
| | output: count(l_partkey), avg:merge(l_quantity), ndv:merge(l_discount) |
| | row-size=24B cardinality=1 |
| | |
| 04:AGGREGATE |
| | output: avg:merge(l_quantity), ndv:merge(l_discount) |
| | group by: l_partkey |
| | row-size=24B cardinality=200.52K |
| | |
| 03:EXCHANGE [HASH(l_partkey)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | output: avg(l_quantity), ndv(l_discount) |
| | group by: l_partkey |
| | row-size=24B cardinality=601.52K |
| | |
| 00:SCAN HDFS [tpch_parquet.lineitem] |
| HDFS partitions=1/1 files=3 size=193.99MB |
| row-size=24B cardinality=6.00M |
| ==== |
| # test that aggregations are not placed below an unpartitioned exchange with a limit |
| select count(*) from (select * from functional.alltypes limit 10) t |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | row-size=8B cardinality=1 |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| limit: 10 |
| row-size=0B cardinality=10 |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | row-size=8B cardinality=1 |
| | |
| 02:EXCHANGE [UNPARTITIONED] |
| | limit: 10 |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| limit: 10 |
| row-size=0B cardinality=10 |
| ==== |
| # test that aggregations are not placed below an unpartitioned exchange with a limit |
| select count(*) from |
| (select * from functional.alltypes |
| union all |
| (select * from functional.alltypessmall) limit 10) t |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 03:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | row-size=8B cardinality=1 |
| | |
| 00:UNION |
| | pass-through-operands: all |
| | limit: 10 |
| | row-size=0B cardinality=10 |
| | |
| |--02:SCAN HDFS [functional.alltypessmall] |
| | HDFS partitions=4/4 files=4 size=6.32KB |
| | row-size=0B cardinality=100 |
| | |
| 01:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| row-size=0B cardinality=7.30K |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 03:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | row-size=8B cardinality=1 |
| | |
| 04:EXCHANGE [UNPARTITIONED] |
| | limit: 10 |
| | |
| 00:UNION |
| | pass-through-operands: all |
| | limit: 10 |
| | row-size=0B cardinality=10 |
| | |
| |--02:SCAN HDFS [functional.alltypessmall] |
| | HDFS partitions=4/4 files=4 size=6.32KB |
| | row-size=0B cardinality=100 |
| | |
| 01:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| row-size=0B cardinality=7.30K |
| ==== |
| # test that limits are applied at the top-level merge aggregation node for non-grouping |
| # distinct aggregation (IMPALA-1802) |
| select * from ( |
| select count(distinct cnt) from |
| (select count(distinct t1.id) as cnt |
| from functional.alltypesagg t1 join functional.alltypestiny t2 on t1.id = t2.id |
| limit 10) t |
| limit 2) v |
| limit 1 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: count(cnt) |
| | limit: 1 |
| | row-size=8B cardinality=1 |
| | |
| 05:AGGREGATE |
| | group by: count(t1.id) |
| | row-size=8B cardinality=1 |
| | |
| 04:AGGREGATE [FINALIZE] |
| | output: count(t1.id) |
| | limit: 10 |
| | row-size=8B cardinality=1 |
| | |
| 03:AGGREGATE |
| | group by: t1.id |
| | row-size=4B cardinality=9 |
| | |
| 02:HASH JOIN [INNER JOIN] |
| | hash predicates: t1.id = t2.id |
| | runtime filters: RF000 <- t2.id |
| | row-size=8B cardinality=9 |
| | |
| |--01:SCAN HDFS [functional.alltypestiny t2] |
| | HDFS partitions=4/4 files=4 size=460B |
| | row-size=4B cardinality=8 |
| | |
| 00:SCAN HDFS [functional.alltypesagg t1] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| runtime filters: RF000 -> t1.id |
| row-size=4B cardinality=11.00K |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: count(cnt) |
| | limit: 1 |
| | row-size=8B cardinality=1 |
| | |
| 05:AGGREGATE |
| | group by: count(t1.id) |
| | row-size=8B cardinality=1 |
| | |
| 11:AGGREGATE [FINALIZE] |
| | output: count:merge(t1.id) |
| | limit: 10 |
| | row-size=8B cardinality=1 |
| | |
| 10:EXCHANGE [UNPARTITIONED] |
| | |
| 04:AGGREGATE |
| | output: count(t1.id) |
| | row-size=8B cardinality=1 |
| | |
| 09:AGGREGATE |
| | group by: t1.id |
| | row-size=4B cardinality=9 |
| | |
| 08:EXCHANGE [HASH(t1.id)] |
| | |
| 03:AGGREGATE [STREAMING] |
| | group by: t1.id |
| | row-size=4B cardinality=9 |
| | |
| 02:HASH JOIN [INNER JOIN, BROADCAST] |
| | hash predicates: t1.id = t2.id |
| | runtime filters: RF000 <- t2.id |
| | row-size=8B cardinality=9 |
| | |
| |--07:EXCHANGE [BROADCAST] |
| | | |
| | 01:SCAN HDFS [functional.alltypestiny t2] |
| | HDFS partitions=4/4 files=4 size=460B |
| | row-size=4B cardinality=8 |
| | |
| 00:SCAN HDFS [functional.alltypesagg t1] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| runtime filters: RF000 -> t1.id |
| row-size=4B cardinality=11.00K |
| ==== |
| # IMPALA-2089: Tests correct elimination of redundant predicates. |
| # The equivalences between inline-view slots are enforced inside the inline-view plan. |
| # Equivalences between simple grouping slots (with SlotRef grouping exprs) are enforced |
| # at the scan, and equivalences between grouping slots with complex grouping exprs are |
| # enforced at the aggregation. |
| # a, b, c, d are in the same equivalence class and some predicates are redundant. |
| select * from |
| (select tinyint_col a, smallint_col b, int_col + int_col c, coalesce(bigint_col, year) d |
| from functional.alltypes |
| group by 1, 2, 3, 4) v |
| where v.a = v.b and v.b = v.c and v.c = v.d and v.a = v.c and v.a = v.d |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 01:AGGREGATE [FINALIZE] |
| | group by: tinyint_col, smallint_col, int_col + int_col, coalesce(bigint_col, `year`) |
| | having: int_col + int_col = coalesce(bigint_col, `year`), smallint_col = int_col + int_col |
| | row-size=19B cardinality=73 |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| predicates: functional.alltypes.tinyint_col = functional.alltypes.smallint_col |
| row-size=19B cardinality=730 |
| ==== |
| # IMPALA-1917: Test NULL literals inside inline view with grouping aggregation. |
| select cnt from |
| (select bool_col, count(*) cnt, cast(NULL as int) as x, cast(NULL as int) as y |
| from functional.alltypestiny |
| group by bool_col, x) v |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | group by: bool_col, CAST(NULL AS INT) |
| | row-size=13B cardinality=2 |
| | |
| 00:SCAN HDFS [functional.alltypestiny] |
| HDFS partitions=4/4 files=4 size=460B |
| row-size=1B cardinality=8 |
| ==== |
| # IMPALA-1917: Test NULL literals inside inline view with grouping aggregation. |
| select cnt from |
| (select bool_col, count(distinct int_col) cnt, NULL as x, NULL as y |
| from functional.alltypestiny |
| group by bool_col, x) v |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: count(int_col) |
| | group by: bool_col, NULL |
| | row-size=10B cardinality=2 |
| | |
| 01:AGGREGATE |
| | group by: bool_col, NULL, int_col |
| | row-size=6B cardinality=4 |
| | |
| 00:SCAN HDFS [functional.alltypestiny] |
| HDFS partitions=4/4 files=4 size=460B |
| row-size=5B cardinality=8 |
| ==== |
| # test simple group_concat with distinct |
| select group_concat(distinct string_col) from functional.alltypesagg |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: group_concat(string_col) |
| | row-size=12B cardinality=1 |
| | |
| 01:AGGREGATE |
| | group by: string_col |
| | row-size=15B cardinality=963 |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| row-size=15B cardinality=11.00K |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: group_concat:merge(string_col) |
| | row-size=12B cardinality=1 |
| | |
| 05:EXCHANGE [UNPARTITIONED] |
| | |
| 02:AGGREGATE |
| | output: group_concat(string_col) |
| | row-size=12B cardinality=1 |
| | |
| 04:AGGREGATE |
| | group by: string_col |
| | row-size=15B cardinality=963 |
| | |
| 03:EXCHANGE [HASH(string_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | group by: string_col |
| | row-size=15B cardinality=2.83K |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| row-size=15B cardinality=11.00K |
| ==== |
| # test group_concat and a group by |
| select day, group_concat(distinct string_col) |
| from (select * from functional.alltypesagg where id % 100 = day order by id limit 99999) a |
| group by day |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 03:AGGREGATE [FINALIZE] |
| | output: group_concat(string_col) |
| | group by: `day` |
| | row-size=16B cardinality=11 |
| | |
| 02:AGGREGATE |
| | group by: day, string_col |
| | row-size=19B cardinality=1.10K |
| | |
| 01:TOP-N [LIMIT=99999] |
| | order by: id ASC |
| | row-size=23B cardinality=1.10K |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| predicates: `day` = id % 100 |
| row-size=23B cardinality=1.10K |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 03:AGGREGATE [FINALIZE] |
| | output: group_concat(string_col) |
| | group by: `day` |
| | row-size=16B cardinality=11 |
| | |
| 02:AGGREGATE |
| | group by: day, string_col |
| | row-size=19B cardinality=1.10K |
| | |
| 04:MERGING-EXCHANGE [UNPARTITIONED] |
| | order by: id ASC |
| | limit: 99999 |
| | |
| 01:TOP-N [LIMIT=99999] |
| | order by: id ASC |
| | row-size=23B cardinality=1.10K |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| predicates: `day` = id % 100 |
| row-size=23B cardinality=1.10K |
| ==== |
| # test group_concat with distinct together with another distinct aggregate function |
| select count(distinct cast(timestamp_col as string)), |
| group_concat(distinct cast(timestamp_col as string)) |
| from functional.alltypesagg group by year |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: count(CAST(timestamp_col AS STRING)), group_concat(CAST(timestamp_col AS STRING)) |
| | group by: `year` |
| | row-size=24B cardinality=1 |
| | |
| 01:AGGREGATE |
| | group by: `year`, CAST(timestamp_col AS STRING) |
| | row-size=20B cardinality=10.21K |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| row-size=20B cardinality=11.00K |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 07:EXCHANGE [UNPARTITIONED] |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: count:merge(CAST(timestamp_col AS STRING)), group_concat:merge(CAST(timestamp_col AS STRING)) |
| | group by: `year` |
| | row-size=24B cardinality=1 |
| | |
| 05:EXCHANGE [HASH(`year`)] |
| | |
| 02:AGGREGATE [STREAMING] |
| | output: count(CAST(timestamp_col AS STRING)), group_concat(CAST(timestamp_col AS STRING)) |
| | group by: `year` |
| | row-size=24B cardinality=3 |
| | |
| 04:AGGREGATE |
| | group by: `year`, CAST(timestamp_col AS STRING) |
| | row-size=20B cardinality=9.24K |
| | |
| 03:EXCHANGE [HASH(`year`,CAST(timestamp_col AS STRING))] |
| | |
| 01:AGGREGATE [STREAMING] |
| | group by: `year`, CAST(timestamp_col AS STRING) |
| | row-size=20B cardinality=9.24K |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| row-size=20B cardinality=11.00K |
| ==== |
| # test group_concat distinct with other non-distinct aggregate functions |
| select group_concat(distinct string_col), count(*) from functional.alltypesagg |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: group_concat(string_col), count:merge(*) |
| | row-size=20B cardinality=1 |
| | |
| 01:AGGREGATE |
| | output: count(*) |
| | group by: string_col |
| | row-size=23B cardinality=963 |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| row-size=15B cardinality=11.00K |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: group_concat:merge(string_col), count:merge(*) |
| | row-size=20B cardinality=1 |
| | |
| 05:EXCHANGE [UNPARTITIONED] |
| | |
| 02:AGGREGATE |
| | output: group_concat(string_col), count:merge(*) |
| | row-size=20B cardinality=1 |
| | |
| 04:AGGREGATE |
| | output: count:merge(*) |
| | group by: string_col |
| | row-size=23B cardinality=963 |
| | |
| 03:EXCHANGE [HASH(string_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | output: count(*) |
| | group by: string_col |
| | row-size=23B cardinality=2.83K |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| row-size=15B cardinality=11.00K |
| ==== |
| # test group_concat distinct with other aggregate functions, with custom separator |
| select group_concat(distinct string_col, '-'), sum(int_col), count(distinct string_col) |
| from functional.alltypesagg |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: group_concat(string_col, '-'), count(string_col), sum:merge(int_col) |
| | row-size=28B cardinality=1 |
| | |
| 01:AGGREGATE |
| | output: sum(int_col) |
| | group by: string_col |
| | row-size=23B cardinality=963 |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| row-size=19B cardinality=11.00K |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: group_concat:merge(string_col, '-'), count:merge(string_col), sum:merge(int_col) |
| | row-size=28B cardinality=1 |
| | |
| 05:EXCHANGE [UNPARTITIONED] |
| | |
| 02:AGGREGATE |
| | output: group_concat(string_col, '-'), count(string_col), sum:merge(int_col) |
| | row-size=28B cardinality=1 |
| | |
| 04:AGGREGATE |
| | output: sum:merge(int_col) |
| | group by: string_col |
| | row-size=23B cardinality=963 |
| | |
| 03:EXCHANGE [HASH(string_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | output: sum(int_col) |
| | group by: string_col |
| | row-size=23B cardinality=2.83K |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| row-size=19B cardinality=11.00K |
| ==== |
| # test group_concat distinct with other aggregate functions, with custom separator |
| # and a group by |
| select month, year, count(*), count(distinct date_string_col), |
| group_concat(distinct date_string_col, '-') from functional.alltypesagg |
| group by month, year |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: count(date_string_col), group_concat(date_string_col, '-'), count:merge(*) |
| | group by: `month`, `year` |
| | row-size=36B cardinality=1 |
| | |
| 01:AGGREGATE |
| | output: count(*) |
| | group by: `month`, `year`, date_string_col |
| | row-size=36B cardinality=10 |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| row-size=28B cardinality=11.00K |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 07:EXCHANGE [UNPARTITIONED] |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: count:merge(date_string_col), group_concat:merge(date_string_col, '-'), count:merge(*) |
| | group by: `month`, `year` |
| | row-size=36B cardinality=1 |
| | |
| 05:EXCHANGE [HASH(`month`,`year`)] |
| | |
| 02:AGGREGATE [STREAMING] |
| | output: count(date_string_col), group_concat(date_string_col, '-'), count:merge(*) |
| | group by: `month`, `year` |
| | row-size=36B cardinality=3 |
| | |
| 04:AGGREGATE |
| | output: count:merge(*) |
| | group by: `month`, `year`, date_string_col |
| | row-size=36B cardinality=10 |
| | |
| 03:EXCHANGE [HASH(`month`,`year`,date_string_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | output: count(*) |
| | group by: `month`, `year`, date_string_col |
| | row-size=36B cardinality=30 |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| row-size=28B cardinality=11.00K |
| ==== |
| # test multiple group_concat distinct, each with a different separator |
| select group_concat(distinct string_col), group_concat(distinct string_col, '-'), |
| group_concat(distinct string_col, '---') from functional.alltypesagg |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: group_concat(string_col), group_concat(string_col, '-'), group_concat(string_col, '---') |
| | row-size=36B cardinality=1 |
| | |
| 01:AGGREGATE |
| | group by: string_col |
| | row-size=15B cardinality=963 |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| row-size=15B cardinality=11.00K |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: group_concat:merge(string_col), group_concat:merge(string_col, '-'), group_concat:merge(string_col, '---') |
| | row-size=36B cardinality=1 |
| | |
| 05:EXCHANGE [UNPARTITIONED] |
| | |
| 02:AGGREGATE |
| | output: group_concat(string_col), group_concat(string_col, '-'), group_concat(string_col, '---') |
| | row-size=36B cardinality=1 |
| | |
| 04:AGGREGATE |
| | group by: string_col |
| | row-size=15B cardinality=963 |
| | |
| 03:EXCHANGE [HASH(string_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | group by: string_col |
| | row-size=15B cardinality=2.83K |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| HDFS partitions=11/11 files=11 size=814.73KB |
| row-size=15B cardinality=11.00K |
| ==== |
| # IMPALA-852: Aggregation only in the HAVING clause. |
| select 1 from functional.alltypestiny having count(*) > 0 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | having: count(*) > 0 |
| | row-size=8B cardinality=1 |
| | |
| 00:SCAN HDFS [functional.alltypestiny] |
| HDFS partitions=4/4 files=4 size=460B |
| row-size=0B cardinality=8 |
| ==== |
| # Grouping aggregation where input is partitioned on grouping expr. |
| # Planner should not redundantly repartition the data that was already partitioned on |
| # the required key by the join. |
| select straight_join c_custkey, count(*) |
| from tpch_parquet.customer inner join [shuffle] tpch_parquet.orders on c_custkey = o_custkey |
| where c_nationkey = 16 |
| group by 1 |
| having count(*) < 150000 |
| limit 1000000 |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 06:EXCHANGE [UNPARTITIONED] |
| | limit: 1000000 |
| | |
| 03:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | group by: c_custkey |
| | having: count(*) < 150000 |
| | limit: 1000000 |
| | row-size=16B cardinality=9.15K |
| | |
| 02:HASH JOIN [INNER JOIN, PARTITIONED] |
| | hash predicates: c_custkey = o_custkey |
| | runtime filters: RF000 <- o_custkey |
| | row-size=18B cardinality=91.47K |
| | |
| |--05:EXCHANGE [HASH(o_custkey)] |
| | | |
| | 01:SCAN HDFS [tpch_parquet.orders] |
| | HDFS partitions=1/1 files=2 size=54.21MB |
| | row-size=8B cardinality=1.50M |
| | |
| 04:EXCHANGE [HASH(c_custkey)] |
| | |
| 00:SCAN HDFS [tpch_parquet.customer] |
| HDFS partitions=1/1 files=1 size=12.34MB |
| predicates: c_nationkey = 16 |
| runtime filters: RF000 -> c_custkey |
| row-size=10B cardinality=6.00K |
| ==== |
| # Distinct aggregation where input is partitioned on distinct expr. |
| # Planner should not redundantly repartition the data that was already partitioned on |
| # the required key by the join. |
| select col from ( |
| select straight_join count(distinct c_custkey) col |
| from tpch_parquet.orders inner join [shuffle] tpch_parquet.customer on c_custkey = o_custkey) v |
| where col > 50 |
| limit 50 |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 08:AGGREGATE [FINALIZE] |
| | output: count:merge(c_custkey) |
| | having: count(c_custkey) > 50 |
| | limit: 50 |
| | row-size=8B cardinality=1 |
| | |
| 07:EXCHANGE [UNPARTITIONED] |
| | |
| 04:AGGREGATE |
| | output: count(c_custkey) |
| | row-size=8B cardinality=1 |
| | |
| 03:AGGREGATE |
| | group by: c_custkey |
| | row-size=8B cardinality=150.00K |
| | |
| 02:HASH JOIN [INNER JOIN, PARTITIONED] |
| | hash predicates: o_custkey = c_custkey |
| | runtime filters: RF000 <- c_custkey |
| | row-size=16B cardinality=1.50M |
| | |
| |--06:EXCHANGE [HASH(c_custkey)] |
| | | |
| | 01:SCAN HDFS [tpch_parquet.customer] |
| | HDFS partitions=1/1 files=1 size=12.34MB |
| | row-size=8B cardinality=150.00K |
| | |
| 05:EXCHANGE [HASH(o_custkey)] |
| | |
| 00:SCAN HDFS [tpch_parquet.orders] |
| HDFS partitions=1/1 files=2 size=54.21MB |
| runtime filters: RF000 -> o_custkey |
| row-size=8B cardinality=1.50M |
| ==== |
| # Distinct grouping aggregation where input is partitioned on distinct and grouping exprs. |
| # Planner should not redundantly repartition the data that was already partitioned on |
| # the required key by the join. |
| select straight_join c_custkey, count(distinct c_custkey) |
| from tpch_parquet.orders inner join [shuffle] tpch_parquet.customer on c_custkey = o_custkey |
| group by 1 |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 09:EXCHANGE [UNPARTITIONED] |
| | |
| 08:AGGREGATE [FINALIZE] |
| | output: count:merge(c_custkey) |
| | group by: c_custkey |
| | row-size=16B cardinality=150.00K |
| | |
| 07:EXCHANGE [HASH(c_custkey)] |
| | |
| 04:AGGREGATE [STREAMING] |
| | output: count(c_custkey) |
| | group by: c_custkey |
| | row-size=16B cardinality=150.00K |
| | |
| 03:AGGREGATE |
| | group by: c_custkey, c_custkey |
| | row-size=16B cardinality=150.00K |
| | |
| 02:HASH JOIN [INNER JOIN, PARTITIONED] |
| | hash predicates: o_custkey = c_custkey |
| | runtime filters: RF000 <- c_custkey |
| | row-size=16B cardinality=1.50M |
| | |
| |--06:EXCHANGE [HASH(c_custkey)] |
| | | |
| | 01:SCAN HDFS [tpch_parquet.customer] |
| | HDFS partitions=1/1 files=1 size=12.34MB |
| | row-size=8B cardinality=150.00K |
| | |
| 05:EXCHANGE [HASH(o_custkey)] |
| | |
| 00:SCAN HDFS [tpch_parquet.orders] |
| HDFS partitions=1/1 files=2 size=54.21MB |
| runtime filters: RF000 -> o_custkey |
| row-size=8B cardinality=1.50M |
| ==== |
| # Complex aggregation when two joins and an agg end up in same fragment. |
| select l_orderkey, l_returnflag, count(*) from ( |
| select straight_join * |
| from tpch_parquet.lineitem |
| inner join [shuffle] tpch_parquet.orders |
| on l_orderkey = o_orderkey and l_returnflag = o_clerk |
| inner join [broadcast] tpch_parquet.customer |
| on o_custkey = c_custkey and c_phone = o_comment |
| ) v |
| group by 1, 2 |
| having count(*) > 10 |
| limit 10 |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 09:EXCHANGE [UNPARTITIONED] |
| | limit: 10 |
| | |
| 05:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | group by: tpch_parquet.lineitem.l_orderkey, tpch_parquet.lineitem.l_returnflag |
| | having: count(*) > 10 |
| | limit: 10 |
| | row-size=29B cardinality=10 |
| | |
| 04:HASH JOIN [INNER JOIN, BROADCAST] |
| | hash predicates: o_custkey = c_custkey, o_comment = c_phone |
| | runtime filters: RF000 <- c_custkey, RF001 <- c_phone |
| | row-size=160B cardinality=607.19K |
| | |
| |--08:EXCHANGE [BROADCAST] |
| | | |
| | 02:SCAN HDFS [tpch_parquet.customer] |
| | HDFS partitions=1/1 files=1 size=12.34MB |
| | row-size=35B cardinality=150.00K |
| | |
| 03:HASH JOIN [INNER JOIN, PARTITIONED] |
| | hash predicates: l_orderkey = o_orderkey, l_returnflag = o_clerk |
| | runtime filters: RF004 <- o_orderkey, RF005 <- o_clerk |
| | row-size=125B cardinality=5.76M |
| | |
| |--07:EXCHANGE [HASH(o_orderkey,o_clerk)] |
| | | |
| | 01:SCAN HDFS [tpch_parquet.orders] |
| | HDFS partitions=1/1 files=2 size=54.21MB |
| | runtime filters: RF000 -> o_custkey, RF001 -> o_comment |
| | row-size=104B cardinality=1.50M |
| | |
| 06:EXCHANGE [HASH(l_orderkey,l_returnflag)] |
| | |
| 00:SCAN HDFS [tpch_parquet.lineitem] |
| HDFS partitions=1/1 files=3 size=193.99MB |
| runtime filters: RF004 -> l_orderkey, RF005 -> l_returnflag |
| row-size=21B cardinality=5.76M(filtered from 6.00M) |
| ==== |
| # IMPALA-4263: Grouping agg needs a merge step because the grouping exprs reference a |
| # tuple that is made nullable in the join fragment. |
| select /* +straight_join */ t2.id, count(*) |
| from functional.alltypes t1 |
| left outer join /* +shuffle */ functional.alltypessmall t2 |
| on t1.id = t2.id |
| group by t2.id |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 08:EXCHANGE [UNPARTITIONED] |
| | |
| 07:AGGREGATE [FINALIZE] |
| | output: count:merge(*) |
| | group by: t2.id |
| | row-size=12B cardinality=99 |
| | |
| 06:EXCHANGE [HASH(t2.id)] |
| | |
| 03:AGGREGATE [STREAMING] |
| | output: count(*) |
| | group by: t2.id |
| | row-size=12B cardinality=297 |
| | |
| 02:HASH JOIN [LEFT OUTER JOIN, PARTITIONED] |
| | hash predicates: t1.id = t2.id |
| | row-size=8B cardinality=7.30K |
| | |
| |--05:EXCHANGE [HASH(t2.id)] |
| | | |
| | 01:SCAN HDFS [functional.alltypessmall t2] |
| | HDFS partitions=4/4 files=4 size=6.32KB |
| | row-size=4B cardinality=100 |
| | |
| 04:EXCHANGE [HASH(t1.id)] |
| | |
| 00:SCAN HDFS [functional.alltypes t1] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| row-size=4B cardinality=7.30K |
| ==== |
| # IMPALA-4263: Grouping agg is placed in the join fragment and has no merge step. |
| select /* +straight_join */ t1.id, count(*) |
| from functional.alltypes t1 |
| left outer join /* +shuffle */ functional.alltypessmall t2 |
| on t1.id = t2.id |
| group by t1.id |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 06:EXCHANGE [UNPARTITIONED] |
| | |
| 03:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | group by: t1.id |
| | row-size=12B cardinality=7.30K |
| | |
| 02:HASH JOIN [LEFT OUTER JOIN, PARTITIONED] |
| | hash predicates: t1.id = t2.id |
| | row-size=8B cardinality=7.30K |
| | |
| |--05:EXCHANGE [HASH(t2.id)] |
| | | |
| | 01:SCAN HDFS [functional.alltypessmall t2] |
| | HDFS partitions=4/4 files=4 size=6.32KB |
| | row-size=4B cardinality=100 |
| | |
| 04:EXCHANGE [HASH(t1.id)] |
| | |
| 00:SCAN HDFS [functional.alltypes t1] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| row-size=4B cardinality=7.30K |
| ==== |
| # IMPALA-4263: Grouping agg is placed in the second join fragment and has no merge step. |
| # The grouping exprs reference a nullable tuple (t2), but that tuple is made nullable in |
| # the first join fragment, so it's correct to place the the aggregation in the second |
| # join fragment without a merge step. |
| select /* +straight_join */ t2.id, count(*) |
| from functional.alltypes t1 |
| left outer join /* +shuffle */ functional.alltypessmall t2 |
| on t1.int_col = t2.int_col |
| left outer join /* +shuffle */ functional.alltypestiny t3 |
| on t2.id = t3.id |
| group by t2.id |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 10:EXCHANGE [UNPARTITIONED] |
| | |
| 05:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | group by: t2.id |
| | row-size=12B cardinality=99 |
| | |
| 04:HASH JOIN [LEFT OUTER JOIN, PARTITIONED] |
| | hash predicates: t2.id = t3.id |
| | row-size=16B cardinality=73.00K |
| | |
| |--09:EXCHANGE [HASH(t3.id)] |
| | | |
| | 02:SCAN HDFS [functional.alltypestiny t3] |
| | HDFS partitions=4/4 files=4 size=460B |
| | row-size=4B cardinality=8 |
| | |
| 08:EXCHANGE [HASH(t2.id)] |
| | |
| 03:HASH JOIN [LEFT OUTER JOIN, PARTITIONED] |
| | hash predicates: t1.int_col = t2.int_col |
| | row-size=12B cardinality=73.00K |
| | |
| |--07:EXCHANGE [HASH(t2.int_col)] |
| | | |
| | 01:SCAN HDFS [functional.alltypessmall t2] |
| | HDFS partitions=4/4 files=4 size=6.32KB |
| | row-size=8B cardinality=100 |
| | |
| 06:EXCHANGE [HASH(t1.int_col)] |
| | |
| 00:SCAN HDFS [functional.alltypes t1] |
| HDFS partitions=24/24 files=24 size=478.45KB |
| row-size=4B cardinality=7.30K |
| ==== |
| # IMPALA-10096: use the original ordinal if the group by ordinal reference is a |
| # constant int |
| select 13, id, count(1) from functional.dimtbl group by 1, 2 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | group by: 13, id |
| | row-size=17B cardinality=10 |
| | |
| 00:SCAN HDFS [functional.dimtbl] |
| HDFS partitions=1/1 files=1 size=171B |
| row-size=8B cardinality=10 |
| ==== |
| # IMPALA-10096: use the original ordinal if the group by ordinal reference is a |
| # constant int |
| select -1, id, count(1) from functional.dimtbl group by 1, 2 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | group by: -1, id |
| | row-size=17B cardinality=10 |
| | |
| 00:SCAN HDFS [functional.dimtbl] |
| HDFS partitions=1/1 files=1 size=171B |
| row-size=8B cardinality=10 |
| ==== |
| # IMPALA-10096: use the original ordinal if the group by ordinal reference is a |
| # constant int |
| select 2, id, count(1) from functional.dimtbl group by 1, 2 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | group by: 2, id |
| | row-size=17B cardinality=10 |
| | |
| 00:SCAN HDFS [functional.dimtbl] |
| HDFS partitions=1/1 files=1 size=171B |
| row-size=8B cardinality=10 |
| ==== |
| # IMPALA-10096: use the original ordinal if the group by ordinal reference is a |
| # constant int |
| select 2 + 1, id, count(1) from functional.dimtbl group by 1, 2 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | group by: 3, id |
| | row-size=17B cardinality=10 |
| | |
| 00:SCAN HDFS [functional.dimtbl] |
| HDFS partitions=1/1 files=1 size=171B |
| row-size=8B cardinality=10 |
| ==== |
| # IMPALA-10865: Group by expr with column alias reference errors in |
| # re-analyze phase. |
| SELECT ss_item_sk ss_item_sk_group, ss_item_sk+300 ss_item_sk, |
| count(ss_ticket_number) |
| FROM tpcds.store_sales a |
| WHERE ss_sold_date_sk > cast('245263' AS INT) |
| GROUP BY ss_item_sk_group, |
| ss_item_sk |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(ss_ticket_number) |
| | group by: ss_item_sk, ss_item_sk + 300 |
| | row-size=24B cardinality=2.75M |
| | |
| 00:SCAN HDFS [tpcds.store_sales a] |
| partition predicates: ss_sold_date_sk > 245263 |
| HDFS partitions=1823/1824 files=1823 size=336.51MB |
| row-size=16B cardinality=2.75M |
| ---- PARALLELPLANS |
| PLAN-ROOT SINK |
| | |
| 04:EXCHANGE [UNPARTITIONED] |
| | |
| 03:AGGREGATE [FINALIZE] |
| | output: count:merge(ss_ticket_number) |
| | group by: ss_item_sk, ss_item_sk + 300 |
| | row-size=24B cardinality=2.75M |
| | |
| 02:EXCHANGE [HASH(ss_item_sk,ss_item_sk + 300)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | output: count(ss_ticket_number) |
| | group by: ss_item_sk, ss_item_sk + 300 |
| | row-size=24B cardinality=2.75M |
| | |
| 00:SCAN HDFS [tpcds.store_sales a] |
| partition predicates: ss_sold_date_sk > 245263 |
| HDFS partitions=1823/1824 files=1823 size=336.51MB |
| row-size=16B cardinality=2.75M |
| ==== |
| # Baseline for next similar test cases below that do 2-phase aggregation. |
| # The query output is: |
| # +----+----+----+ |
| # | bc | c1 | c2 | |
| # +----+----+----+ |
| # | 0 | 12 | 1 | |
| # | 10 | 12 | 1 | |
| # | 20 | 12 | 1 | |
| # | 30 | 12 | 1 | |
| # | 40 | 12 | 1 | |
| # | 50 | 8 | 1 | |
| # | 60 | 8 | 1 | |
| # | 70 | 8 | 1 | |
| # | 80 | 8 | 1 | |
| # | 90 | 8 | 1 | |
| # +----+----+----+ |
| select |
| bigint_col bc, count(smallint_col) c1, count(distinct int_col) c2 |
| from functional.alltypessmall |
| group by bigint_col |
| order by bc; |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 03:SORT |
| | order by: bigint_col ASC |
| | row-size=24B cardinality=10 |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: count(int_col), count:merge(smallint_col) |
| | group by: bigint_col |
| | row-size=24B cardinality=10 |
| | |
| 01:AGGREGATE |
| | output: count(smallint_col) |
| | group by: bigint_col, int_col |
| | row-size=20B cardinality=100 |
| | |
| 00:SCAN HDFS [functional.alltypessmall] |
| HDFS partitions=4/4 files=4 size=6.32KB |
| row-size=14B cardinality=100 |
| ---- PARALLELPLANS |
| PLAN-ROOT SINK |
| | |
| 08:MERGING-EXCHANGE [UNPARTITIONED] |
| | order by: bigint_col ASC |
| | |
| 03:SORT |
| | order by: bigint_col ASC |
| | row-size=24B cardinality=10 |
| | |
| 07:AGGREGATE [FINALIZE] |
| | output: count:merge(int_col), count:merge(smallint_col) |
| | group by: bigint_col |
| | row-size=24B cardinality=10 |
| | |
| 06:EXCHANGE [HASH(bigint_col)] |
| | |
| 02:AGGREGATE [STREAMING] |
| | output: count(int_col), count:merge(smallint_col) |
| | group by: bigint_col |
| | row-size=24B cardinality=40 |
| | |
| 05:AGGREGATE |
| | output: count:merge(smallint_col) |
| | group by: bigint_col, int_col |
| | row-size=20B cardinality=100 |
| | |
| 04:EXCHANGE [HASH(bigint_col,int_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | output: count(smallint_col) |
| | group by: bigint_col, int_col |
| | row-size=20B cardinality=100 |
| | |
| 00:SCAN HDFS [functional.alltypessmall] |
| HDFS partitions=4/4 files=4 size=6.32KB |
| row-size=14B cardinality=100 |
| ==== |
| # Add limit 7. |
| select |
| bigint_col bc, count(smallint_col) c1, count(distinct int_col) c2 |
| from functional.alltypessmall |
| group by bigint_col |
| order by bc |
| limit 7; |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 03:TOP-N [LIMIT=7] |
| | order by: bigint_col ASC |
| | row-size=24B cardinality=7 |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: count(int_col), count:merge(smallint_col) |
| | group by: bigint_col |
| | row-size=24B cardinality=10 |
| | |
| 01:AGGREGATE |
| | output: count(smallint_col) |
| | group by: bigint_col, int_col |
| | row-size=20B cardinality=100 |
| | |
| 00:SCAN HDFS [functional.alltypessmall] |
| HDFS partitions=4/4 files=4 size=6.32KB |
| row-size=14B cardinality=100 |
| ---- PARALLELPLANS |
| PLAN-ROOT SINK |
| | |
| 08:MERGING-EXCHANGE [UNPARTITIONED] |
| | order by: bigint_col ASC |
| | limit: 7 |
| | |
| 03:TOP-N [LIMIT=7] |
| | order by: bigint_col ASC |
| | row-size=24B cardinality=7 |
| | |
| 07:AGGREGATE [FINALIZE] |
| | output: count:merge(int_col), count:merge(smallint_col) |
| | group by: bigint_col |
| | row-size=24B cardinality=10 |
| | |
| 06:EXCHANGE [HASH(bigint_col)] |
| | |
| 02:AGGREGATE [STREAMING] |
| | output: count(int_col), count:merge(smallint_col) |
| | group by: bigint_col |
| | row-size=24B cardinality=40 |
| | |
| 05:AGGREGATE |
| | output: count:merge(smallint_col) |
| | group by: bigint_col, int_col |
| | row-size=20B cardinality=100 |
| | |
| 04:EXCHANGE [HASH(bigint_col,int_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | output: count(smallint_col) |
| | group by: bigint_col, int_col |
| | row-size=20B cardinality=100 |
| | |
| 00:SCAN HDFS [functional.alltypessmall] |
| HDFS partitions=4/4 files=4 size=6.32KB |
| row-size=14B cardinality=100 |
| ==== |
| # Add equality HAVING predicate over grouping column. |
| # That HAVING predicate should be pushed down to scan. |
| select |
| bigint_col bc, count(smallint_col) c1, count(distinct int_col) c2 |
| from functional.alltypessmall |
| group by bigint_col |
| having bigint_col = 0 |
| order by bc; |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 03:SORT |
| | order by: bigint_col ASC |
| | row-size=24B cardinality=10 |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: count(int_col), count:merge(smallint_col) |
| | group by: bigint_col |
| | row-size=24B cardinality=10 |
| | |
| 01:AGGREGATE |
| | output: count(smallint_col) |
| | group by: bigint_col, int_col |
| | row-size=20B cardinality=10 |
| | |
| 00:SCAN HDFS [functional.alltypessmall] |
| HDFS partitions=4/4 files=4 size=6.32KB |
| predicates: functional.alltypessmall.bigint_col = 0 |
| row-size=14B cardinality=10 |
| ---- PARALLELPLANS |
| PLAN-ROOT SINK |
| | |
| 08:MERGING-EXCHANGE [UNPARTITIONED] |
| | order by: bigint_col ASC |
| | |
| 03:SORT |
| | order by: bigint_col ASC |
| | row-size=24B cardinality=10 |
| | |
| 07:AGGREGATE [FINALIZE] |
| | output: count:merge(int_col), count:merge(smallint_col) |
| | group by: bigint_col |
| | row-size=24B cardinality=10 |
| | |
| 06:EXCHANGE [HASH(bigint_col)] |
| | |
| 02:AGGREGATE [STREAMING] |
| | output: count(int_col), count:merge(smallint_col) |
| | group by: bigint_col |
| | row-size=24B cardinality=10 |
| | |
| 05:AGGREGATE |
| | output: count:merge(smallint_col) |
| | group by: bigint_col, int_col |
| | row-size=20B cardinality=10 |
| | |
| 04:EXCHANGE [HASH(bigint_col,int_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | output: count(smallint_col) |
| | group by: bigint_col, int_col |
| | row-size=20B cardinality=10 |
| | |
| 00:SCAN HDFS [functional.alltypessmall] |
| HDFS partitions=4/4 files=4 size=6.32KB |
| predicates: functional.alltypessmall.bigint_col = 0 |
| row-size=14B cardinality=10 |
| ==== |
| # Add equality HAVING predicate over output expression. |
| # Actual cardinality of 02:AGGREGATE is 5. |
| # TODO: Apply the 10% default selectivity instead. |
| select |
| bigint_col bc, count(smallint_col) c1, count(distinct int_col) c2 |
| from functional.alltypessmall |
| group by bigint_col |
| having count(smallint_col) = 8 |
| order by bc; |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 03:SORT |
| | order by: bigint_col ASC |
| | row-size=24B cardinality=1 |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: count(int_col), count:merge(smallint_col) |
| | group by: bigint_col |
| | having: count(smallint_col) = 8 |
| | row-size=24B cardinality=1 |
| | |
| 01:AGGREGATE |
| | output: count(smallint_col) |
| | group by: bigint_col, int_col |
| | row-size=20B cardinality=100 |
| | |
| 00:SCAN HDFS [functional.alltypessmall] |
| HDFS partitions=4/4 files=4 size=6.32KB |
| row-size=14B cardinality=100 |
| ---- PARALLELPLANS |
| PLAN-ROOT SINK |
| | |
| 08:MERGING-EXCHANGE [UNPARTITIONED] |
| | order by: bigint_col ASC |
| | |
| 03:SORT |
| | order by: bigint_col ASC |
| | row-size=24B cardinality=1 |
| | |
| 07:AGGREGATE [FINALIZE] |
| | output: count:merge(int_col), count:merge(smallint_col) |
| | group by: bigint_col |
| | having: count(smallint_col) = 8 |
| | row-size=24B cardinality=1 |
| | |
| 06:EXCHANGE [HASH(bigint_col)] |
| | |
| 02:AGGREGATE [STREAMING] |
| | output: count(int_col), count:merge(smallint_col) |
| | group by: bigint_col |
| | row-size=24B cardinality=40 |
| | |
| 05:AGGREGATE |
| | output: count:merge(smallint_col) |
| | group by: bigint_col, int_col |
| | row-size=20B cardinality=100 |
| | |
| 04:EXCHANGE [HASH(bigint_col,int_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | output: count(smallint_col) |
| | group by: bigint_col, int_col |
| | row-size=20B cardinality=100 |
| | |
| 00:SCAN HDFS [functional.alltypessmall] |
| HDFS partitions=4/4 files=4 size=6.32KB |
| row-size=14B cardinality=100 |
| ==== |
| # Add in-list HAVING predicate over output expression. |
| # Actual cardinality of 02:AGGREGATE is 5. |
| # TODO: Apply the 10% default selectivity instead. |
| select |
| bigint_col bc, count(smallint_col) c1, count(distinct int_col) c2 |
| from functional.alltypessmall |
| group by bigint_col |
| having count(smallint_col) in (7, 8, 9) |
| order by bc; |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 03:SORT |
| | order by: bigint_col ASC |
| | row-size=24B cardinality=3 |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: count(int_col), count:merge(smallint_col) |
| | group by: bigint_col |
| | having: count(smallint_col) IN (7, 8, 9) |
| | row-size=24B cardinality=3 |
| | |
| 01:AGGREGATE |
| | output: count(smallint_col) |
| | group by: bigint_col, int_col |
| | row-size=20B cardinality=100 |
| | |
| 00:SCAN HDFS [functional.alltypessmall] |
| HDFS partitions=4/4 files=4 size=6.32KB |
| row-size=14B cardinality=100 |
| ---- PARALLELPLANS |
| PLAN-ROOT SINK |
| | |
| 08:MERGING-EXCHANGE [UNPARTITIONED] |
| | order by: bigint_col ASC |
| | |
| 03:SORT |
| | order by: bigint_col ASC |
| | row-size=24B cardinality=3 |
| | |
| 07:AGGREGATE [FINALIZE] |
| | output: count:merge(int_col), count:merge(smallint_col) |
| | group by: bigint_col |
| | having: count(smallint_col) IN (7, 8, 9) |
| | row-size=24B cardinality=3 |
| | |
| 06:EXCHANGE [HASH(bigint_col)] |
| | |
| 02:AGGREGATE [STREAMING] |
| | output: count(int_col), count:merge(smallint_col) |
| | group by: bigint_col |
| | row-size=24B cardinality=40 |
| | |
| 05:AGGREGATE |
| | output: count:merge(smallint_col) |
| | group by: bigint_col, int_col |
| | row-size=20B cardinality=100 |
| | |
| 04:EXCHANGE [HASH(bigint_col,int_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | output: count(smallint_col) |
| | group by: bigint_col, int_col |
| | row-size=20B cardinality=100 |
| | |
| 00:SCAN HDFS [functional.alltypessmall] |
| HDFS partitions=4/4 files=4 size=6.32KB |
| row-size=14B cardinality=100 |
| ==== |
| # Aggregation across nested views. The NDV of aggregation columns are following: |
| # ss_item_sk = 17975 |
| # ss_customer_sk = 90632 |
| # d_month_seq = 2421 |
| # numRows(store_sales) = 2880404 & numRows(date_dim) = 73049 |
| with v1 as ( |
| select ss_item_sk, ss_customer_sk, d_month_seq, count(*) |
| from tpcds.store_sales ss |
| inner join tpcds.date_dim dd on ss.ss_sold_date_sk = dd.d_date_sk |
| group by ss_item_sk, ss_customer_sk, d_month_seq), |
| v2 as ( |
| select ss_item_sk, ss_customer_sk, count(*) |
| from v1 group by ss_item_sk, ss_customer_sk) |
| select ss_item_sk, count(*) from v2 group by ss_item_sk; |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 05:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | group by: ss_item_sk |
| | row-size=16B cardinality=17.98K |
| | |
| 04:AGGREGATE [FINALIZE] |
| | group by: ss_item_sk, ss_customer_sk |
| | row-size=12B cardinality=2.88M |
| | |
| 03:AGGREGATE [FINALIZE] |
| | group by: ss_item_sk, ss_customer_sk, d_month_seq |
| | row-size=16B cardinality=2.88M |
| | |
| 02:HASH JOIN [INNER JOIN] |
| | hash predicates: ss.ss_sold_date_sk = dd.d_date_sk |
| | runtime filters: RF000 <- dd.d_date_sk |
| | row-size=24B cardinality=2.88M |
| | |
| |--01:SCAN HDFS [tpcds.date_dim dd] |
| | HDFS partitions=1/1 files=1 size=9.84MB |
| | row-size=8B cardinality=73.05K |
| | |
| 00:SCAN HDFS [tpcds.store_sales ss] |
| HDFS partitions=1824/1824 files=1824 size=346.60MB |
| runtime filters: RF000 -> ss.ss_sold_date_sk |
| row-size=16B cardinality=2.88M |
| ---- PARALLELPLANS |
| PLAN-ROOT SINK |
| | |
| 13:EXCHANGE [UNPARTITIONED] |
| | |
| 12:AGGREGATE [FINALIZE] |
| | output: count:merge(*) |
| | group by: ss_item_sk |
| | row-size=16B cardinality=17.98K |
| | |
| 11:EXCHANGE [HASH(ss_item_sk)] |
| | |
| 05:AGGREGATE [STREAMING] |
| | output: count(*) |
| | group by: ss_item_sk |
| | row-size=16B cardinality=107.85K |
| | |
| 10:AGGREGATE [FINALIZE] |
| | group by: ss_item_sk, ss_customer_sk |
| | row-size=12B cardinality=2.88M |
| | |
| 09:EXCHANGE [HASH(ss_item_sk,ss_customer_sk)] |
| | |
| 04:AGGREGATE [STREAMING] |
| | group by: ss_item_sk, ss_customer_sk |
| | row-size=12B cardinality=2.88M |
| | |
| 08:AGGREGATE [FINALIZE] |
| | group by: ss_item_sk, ss_customer_sk, d_month_seq |
| | row-size=16B cardinality=2.88M |
| | |
| 07:EXCHANGE [HASH(ss_item_sk,ss_customer_sk,d_month_seq)] |
| | |
| 03:AGGREGATE [STREAMING] |
| | group by: ss_item_sk, ss_customer_sk, d_month_seq |
| | row-size=16B cardinality=2.88M |
| | |
| 02:HASH JOIN [INNER JOIN, BROADCAST] |
| | hash predicates: ss.ss_sold_date_sk = dd.d_date_sk |
| | row-size=24B cardinality=2.88M |
| | |
| |--JOIN BUILD |
| | | join-table-id=00 plan-id=01 cohort-id=01 |
| | | build expressions: dd.d_date_sk |
| | | runtime filters: RF000 <- dd.d_date_sk |
| | | |
| | 06:EXCHANGE [BROADCAST] |
| | | |
| | 01:SCAN HDFS [tpcds.date_dim dd] |
| | HDFS partitions=1/1 files=1 size=9.84MB |
| | row-size=8B cardinality=73.05K |
| | |
| 00:SCAN HDFS [tpcds.store_sales ss] |
| HDFS partitions=1824/1824 files=1824 size=346.60MB |
| runtime filters: RF000 -> ss.ss_sold_date_sk |
| row-size=16B cardinality=2.88M |
| ==== |
| # Aggregation across nested views where the grouping columns do not change. |
| # The NDV of aggregation columns are following: |
| # c_nationkey = 25 |
| # c_custkey = 150000 |
| with v1 as ( |
| select c_nationkey, c_custkey, count(*) |
| from tpch.customer |
| group by c_nationkey, c_custkey), |
| v2 as ( |
| select c_nationkey, c_custkey, count(*) |
| from v1, tpch.orders |
| where c_custkey = o_custkey |
| group by c_nationkey, c_custkey) |
| select c_nationkey, count(*) from v2 group by c_nationkey; |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 05:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | group by: c_nationkey |
| | row-size=10B cardinality=25 |
| | |
| 04:AGGREGATE [FINALIZE] |
| | group by: c_nationkey, c_custkey |
| | row-size=10B cardinality=150.00K |
| | |
| 03:HASH JOIN [INNER JOIN] |
| | hash predicates: o_custkey = c_custkey |
| | runtime filters: RF000 <- c_custkey |
| | row-size=18B cardinality=1.50M |
| | |
| |--01:AGGREGATE [FINALIZE] |
| | | group by: c_nationkey, c_custkey |
| | | row-size=10B cardinality=150.00K |
| | | |
| | 00:SCAN HDFS [tpch.customer] |
| | HDFS partitions=1/1 files=1 size=23.08MB |
| | row-size=10B cardinality=150.00K |
| | |
| 02:SCAN HDFS [tpch.orders] |
| HDFS partitions=1/1 files=1 size=162.56MB |
| runtime filters: RF000 -> o_custkey |
| row-size=8B cardinality=1.50M |
| ---- PARALLELPLANS |
| PLAN-ROOT SINK |
| | |
| 13:EXCHANGE [UNPARTITIONED] |
| | |
| 12:AGGREGATE [FINALIZE] |
| | output: count:merge(*) |
| | group by: c_nationkey |
| | row-size=10B cardinality=25 |
| | |
| 11:EXCHANGE [HASH(c_nationkey)] |
| | |
| 05:AGGREGATE [STREAMING] |
| | output: count(*) |
| | group by: c_nationkey |
| | row-size=10B cardinality=50 |
| | |
| 10:AGGREGATE [FINALIZE] |
| | group by: c_nationkey, c_custkey |
| | row-size=10B cardinality=150.00K |
| | |
| 09:EXCHANGE [HASH(c_nationkey,c_custkey)] |
| | |
| 04:AGGREGATE [STREAMING] |
| | group by: c_nationkey, c_custkey |
| | row-size=10B cardinality=297.98K |
| | |
| 03:HASH JOIN [INNER JOIN, BROADCAST] |
| | hash predicates: o_custkey = c_custkey |
| | row-size=18B cardinality=1.50M |
| | |
| |--JOIN BUILD |
| | | join-table-id=00 plan-id=01 cohort-id=01 |
| | | build expressions: c_custkey |
| | | runtime filters: RF000 <- c_custkey |
| | | |
| | 08:EXCHANGE [BROADCAST] |
| | | |
| | 07:AGGREGATE [FINALIZE] |
| | | group by: c_nationkey, c_custkey |
| | | row-size=10B cardinality=150.00K |
| | | |
| | 06:EXCHANGE [HASH(c_nationkey,c_custkey)] |
| | | |
| | 01:AGGREGATE [STREAMING] |
| | | group by: c_nationkey, c_custkey |
| | | row-size=10B cardinality=150.00K |
| | | |
| | 00:SCAN HDFS [tpch.customer] |
| | HDFS partitions=1/1 files=1 size=23.08MB |
| | row-size=10B cardinality=150.00K |
| | |
| 02:SCAN HDFS [tpch.orders] |
| HDFS partitions=1/1 files=1 size=162.56MB |
| runtime filters: RF000 -> o_custkey |
| row-size=8B cardinality=1.50M |
| ==== |
| # Aggregation across nested views where the grouping columns do not change, |
| # and there is a UnionNode in plan tree. |
| # The NDV of aggregation columns are following: |
| # c_nationkey = 25 |
| # c_custkey = 150000 |
| with v0 as ( |
| select * from tpch.customer |
| union all |
| select * from tpch.customer |
| ), |
| v1 as ( |
| select c_nationkey, c_custkey, count(*) |
| from v0 |
| group by c_nationkey, c_custkey), |
| v2 as ( |
| select c_nationkey, c_custkey, count(*) |
| from v1, tpch.orders |
| where c_custkey = o_custkey |
| group by c_nationkey, c_custkey) |
| select c_nationkey, count(*) from v2 group by c_nationkey; |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 07:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | group by: c_nationkey |
| | row-size=10B cardinality=25 |
| | |
| 06:AGGREGATE [FINALIZE] |
| | group by: c_nationkey, c_custkey |
| | row-size=10B cardinality=300.00K |
| | |
| 05:HASH JOIN [INNER JOIN] |
| | hash predicates: o_custkey = c_custkey |
| | runtime filters: RF000 <- c_custkey |
| | row-size=18B cardinality=3.00M |
| | |
| |--03:AGGREGATE [FINALIZE] |
| | | group by: c_nationkey, c_custkey |
| | | row-size=10B cardinality=300.00K |
| | | |
| | 00:UNION |
| | | pass-through-operands: all |
| | | row-size=10B cardinality=300.00K |
| | | |
| | |--02:SCAN HDFS [tpch.customer] |
| | | HDFS partitions=1/1 files=1 size=23.08MB |
| | | row-size=10B cardinality=150.00K |
| | | |
| | 01:SCAN HDFS [tpch.customer] |
| | HDFS partitions=1/1 files=1 size=23.08MB |
| | row-size=10B cardinality=150.00K |
| | |
| 04:SCAN HDFS [tpch.orders] |
| HDFS partitions=1/1 files=1 size=162.56MB |
| runtime filters: RF000 -> o_custkey |
| row-size=8B cardinality=1.50M |
| ---- PARALLELPLANS |
| PLAN-ROOT SINK |
| | |
| 15:EXCHANGE [UNPARTITIONED] |
| | |
| 14:AGGREGATE [FINALIZE] |
| | output: count:merge(*) |
| | group by: c_nationkey |
| | row-size=10B cardinality=25 |
| | |
| 13:EXCHANGE [HASH(c_nationkey)] |
| | |
| 07:AGGREGATE [STREAMING] |
| | output: count(*) |
| | group by: c_nationkey |
| | row-size=10B cardinality=50 |
| | |
| 12:AGGREGATE [FINALIZE] |
| | group by: c_nationkey, c_custkey |
| | row-size=10B cardinality=300.00K |
| | |
| 11:EXCHANGE [HASH(c_nationkey,c_custkey)] |
| | |
| 06:AGGREGATE [STREAMING] |
| | group by: c_nationkey, c_custkey |
| | row-size=10B cardinality=595.96K |
| | |
| 05:HASH JOIN [INNER JOIN, BROADCAST] |
| | hash predicates: o_custkey = c_custkey |
| | row-size=18B cardinality=3.00M |
| | |
| |--JOIN BUILD |
| | | join-table-id=00 plan-id=01 cohort-id=01 |
| | | build expressions: c_custkey |
| | | runtime filters: RF000 <- c_custkey |
| | | |
| | 10:EXCHANGE [BROADCAST] |
| | | |
| | 09:AGGREGATE [FINALIZE] |
| | | group by: c_nationkey, c_custkey |
| | | row-size=10B cardinality=300.00K |
| | | |
| | 08:EXCHANGE [HASH(c_nationkey,c_custkey)] |
| | | |
| | 03:AGGREGATE [STREAMING] |
| | | group by: c_nationkey, c_custkey |
| | | row-size=10B cardinality=300.00K |
| | | |
| | 00:UNION |
| | | pass-through-operands: all |
| | | row-size=10B cardinality=300.00K |
| | | |
| | |--02:SCAN HDFS [tpch.customer] |
| | | HDFS partitions=1/1 files=1 size=23.08MB |
| | | row-size=10B cardinality=150.00K |
| | | |
| | 01:SCAN HDFS [tpch.customer] |
| | HDFS partitions=1/1 files=1 size=23.08MB |
| | row-size=10B cardinality=150.00K |
| | |
| 04:SCAN HDFS [tpch.orders] |
| HDFS partitions=1/1 files=1 size=162.56MB |
| runtime filters: RF000 -> o_custkey |
| row-size=8B cardinality=1.50M |
| ==== |
| # Aggregation across nested views where the grouping columns do not change, |
| # and there is a UnionNode with limit in plan tree. |
| # The NDV of aggregation columns are following: |
| # c_nationkey = 25 |
| # c_custkey = 150000 |
| with v0 as ( |
| select * from ( |
| select * from tpch.customer |
| union all |
| select * from tpch.customer |
| ) u limit 100 |
| ), |
| v1 as ( |
| select c_nationkey, c_custkey, count(*) |
| from v0 |
| group by c_nationkey, c_custkey), |
| v2 as ( |
| select c_nationkey, c_custkey, count(*) |
| from v1, tpch.orders |
| where c_custkey = o_custkey |
| group by c_nationkey, c_custkey) |
| select c_nationkey, count(*) from v2 group by c_nationkey; |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 07:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | group by: c_nationkey |
| | row-size=10B cardinality=25 |
| | |
| 06:AGGREGATE [FINALIZE] |
| | group by: c_nationkey, c_custkey |
| | row-size=10B cardinality=100 |
| | |
| 05:HASH JOIN [INNER JOIN] |
| | hash predicates: o_custkey = c_custkey |
| | runtime filters: RF000 <- c_custkey |
| | row-size=18B cardinality=1.52K |
| | |
| |--03:AGGREGATE [FINALIZE] |
| | | group by: c_nationkey, c_custkey |
| | | row-size=10B cardinality=100 |
| | | |
| | 00:UNION |
| | | pass-through-operands: all |
| | | limit: 100 |
| | | row-size=10B cardinality=100 |
| | | |
| | |--02:SCAN HDFS [tpch.customer] |
| | | HDFS partitions=1/1 files=1 size=23.08MB |
| | | row-size=10B cardinality=150.00K |
| | | |
| | 01:SCAN HDFS [tpch.customer] |
| | HDFS partitions=1/1 files=1 size=23.08MB |
| | row-size=10B cardinality=150.00K |
| | |
| 04:SCAN HDFS [tpch.orders] |
| HDFS partitions=1/1 files=1 size=162.56MB |
| runtime filters: RF000 -> o_custkey |
| row-size=8B cardinality=1.50M |
| ---- PARALLELPLANS |
| PLAN-ROOT SINK |
| | |
| 14:EXCHANGE [UNPARTITIONED] |
| | |
| 13:AGGREGATE [FINALIZE] |
| | output: count:merge(*) |
| | group by: c_nationkey |
| | row-size=10B cardinality=25 |
| | |
| 12:EXCHANGE [HASH(c_nationkey)] |
| | |
| 07:AGGREGATE [STREAMING] |
| | output: count(*) |
| | group by: c_nationkey |
| | row-size=10B cardinality=44 |
| | |
| 11:AGGREGATE [FINALIZE] |
| | group by: c_nationkey, c_custkey |
| | row-size=10B cardinality=100 |
| | |
| 10:EXCHANGE [HASH(c_nationkey,c_custkey)] |
| | |
| 06:AGGREGATE [STREAMING] |
| | group by: c_nationkey, c_custkey |
| | row-size=10B cardinality=200 |
| | |
| 05:HASH JOIN [INNER JOIN, BROADCAST] |
| | hash predicates: o_custkey = c_custkey |
| | row-size=18B cardinality=1.52K |
| | |
| |--JOIN BUILD |
| | | join-table-id=00 plan-id=01 cohort-id=01 |
| | | build expressions: c_custkey |
| | | runtime filters: RF000 <- c_custkey |
| | | |
| | 09:EXCHANGE [BROADCAST] |
| | | |
| | 03:AGGREGATE [FINALIZE] |
| | | group by: c_nationkey, c_custkey |
| | | row-size=10B cardinality=100 |
| | | |
| | 08:EXCHANGE [UNPARTITIONED] |
| | | limit: 100 |
| | | |
| | 00:UNION |
| | | pass-through-operands: all |
| | | limit: 100 |
| | | row-size=10B cardinality=100 |
| | | |
| | |--02:SCAN HDFS [tpch.customer] |
| | | HDFS partitions=1/1 files=1 size=23.08MB |
| | | row-size=10B cardinality=150.00K |
| | | |
| | 01:SCAN HDFS [tpch.customer] |
| | HDFS partitions=1/1 files=1 size=23.08MB |
| | row-size=10B cardinality=150.00K |
| | |
| 04:SCAN HDFS [tpch.orders] |
| HDFS partitions=1/1 files=1 size=162.56MB |
| runtime filters: RF000 -> o_custkey |
| row-size=8B cardinality=1.50M |
| ==== |
| # Test grouping with equality stats predicate on grouping expression. |
| select ss_customer_sk, count(*) |
| from tpcds_parquet.store_sales |
| where ss_customer_sk = 1 |
| group by ss_customer_sk; |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | group by: ss_customer_sk |
| | row-size=12B cardinality=1 |
| | |
| 00:SCAN HDFS [tpcds_parquet.store_sales] |
| HDFS partitions=1824/1824 files=1824 size=200.96MB |
| predicates: ss_customer_sk = 1 |
| row-size=4B cardinality=30 |
| ---- PARALLELPLANS |
| PLAN-ROOT SINK |
| | |
| 04:EXCHANGE [UNPARTITIONED] |
| | |
| 03:AGGREGATE [FINALIZE] |
| | output: count:merge(*) |
| | group by: ss_customer_sk |
| | row-size=12B cardinality=1 |
| | |
| 02:EXCHANGE [HASH(ss_customer_sk)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | output: count(*) |
| | group by: ss_customer_sk |
| | row-size=12B cardinality=6 |
| | |
| 00:SCAN HDFS [tpcds_parquet.store_sales] |
| HDFS partitions=1824/1824 files=1824 size=200.96MB |
| predicates: ss_customer_sk = 1 |
| row-size=4B cardinality=30 |
| ==== |
| # Test grouping with in-list stats predicate on grouping expression. |
| select ss_customer_sk, count(*) |
| from tpcds_parquet.store_sales |
| where ss_customer_sk in (1, 2, 3, 4, 5) |
| group by ss_customer_sk; |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | group by: ss_customer_sk |
| | row-size=12B cardinality=5 |
| | |
| 00:SCAN HDFS [tpcds_parquet.store_sales] |
| HDFS partitions=1824/1824 files=1824 size=200.96MB |
| predicates: ss_customer_sk IN (1, 2, 3, 4, 5) |
| row-size=4B cardinality=159 |
| ---- PARALLELPLANS |
| PLAN-ROOT SINK |
| | |
| 04:EXCHANGE [UNPARTITIONED] |
| | |
| 03:AGGREGATE [FINALIZE] |
| | output: count:merge(*) |
| | group by: ss_customer_sk |
| | row-size=12B cardinality=5 |
| | |
| 02:EXCHANGE [HASH(ss_customer_sk)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | output: count(*) |
| | group by: ss_customer_sk |
| | row-size=12B cardinality=30 |
| | |
| 00:SCAN HDFS [tpcds_parquet.store_sales] |
| HDFS partitions=1824/1824 files=1824 size=200.96MB |
| predicates: ss_customer_sk IN (1, 2, 3, 4, 5) |
| row-size=4B cardinality=159 |
| ==== |
| # Test grouping with is-null stats predicate on grouping expression. |
| select ss_customer_sk, count(*) |
| from tpcds_orc_def.store_sales |
| where ss_customer_sk is null |
| group by ss_customer_sk; |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 01:AGGREGATE [FINALIZE] |
| | output: count(*) |
| | group by: ss_customer_sk |
| | row-size=12B cardinality=1 |
| | |
| 00:SCAN HDFS [tpcds_orc_def.store_sales] |
| HDFS partitions=1824/1824 files=1824 size=101.97MB |
| predicates: ss_customer_sk IS NULL |
| row-size=4B cardinality=unavailable |
| ---- PARALLELPLANS |
| PLAN-ROOT SINK |
| | |
| 04:EXCHANGE [UNPARTITIONED] |
| | |
| 03:AGGREGATE [FINALIZE] |
| | output: count:merge(*) |
| | group by: ss_customer_sk |
| | row-size=12B cardinality=1 |
| | |
| 02:EXCHANGE [HASH(ss_customer_sk)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | output: count(*) |
| | group by: ss_customer_sk |
| | row-size=12B cardinality=1 |
| | |
| 00:SCAN HDFS [tpcds_orc_def.store_sales] |
| HDFS partitions=1824/1824 files=1824 size=101.97MB |
| predicates: ss_customer_sk IS NULL |
| row-size=4B cardinality=unavailable |
| ==== |