| # distinct * |
| select distinct * |
| from functional.testtbl |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 01:AGGREGATE [FINALIZE] |
| | group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip |
| | |
| 00:SCAN HDFS [functional.testtbl] |
| partitions=1/1 files=0 size=0B |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 04:EXCHANGE [UNPARTITIONED] |
| | |
| 03:AGGREGATE [FINALIZE] |
| | group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip |
| | |
| 02:EXCHANGE [HASH(functional.testtbl.id,functional.testtbl.name,functional.testtbl.zip)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip |
| | |
| 00:SCAN HDFS [functional.testtbl] |
| partitions=1/1 files=0 size=0B |
| ==== |
| # distinct w/ explicit select list |
| select distinct id, zip |
| from functional.testtbl |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 01:AGGREGATE [FINALIZE] |
| | group by: id, zip |
| | |
| 00:SCAN HDFS [functional.testtbl] |
| partitions=1/1 files=0 size=0B |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 04:EXCHANGE [UNPARTITIONED] |
| | |
| 03:AGGREGATE [FINALIZE] |
| | group by: id, zip |
| | |
| 02:EXCHANGE [HASH(id,zip)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | group by: id, zip |
| | |
| 00:SCAN HDFS [functional.testtbl] |
| partitions=1/1 files=0 size=0B |
| ==== |
| # count(distinct) |
| select count(distinct id, zip) |
| from functional.testtbl |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: count(if(id IS NULL, NULL, zip)) |
| | |
| 01:AGGREGATE |
| | group by: id, zip |
| | |
| 00:SCAN HDFS [functional.testtbl] |
| partitions=1/1 files=0 size=0B |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: count:merge(if(id IS NULL, NULL, zip)) |
| | |
| 05:EXCHANGE [UNPARTITIONED] |
| | |
| 02:AGGREGATE |
| | output: count(if(id IS NULL, NULL, zip)) |
| | |
| 04:AGGREGATE |
| | group by: id, zip |
| | |
| 03:EXCHANGE [HASH(id,zip)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | group by: id, zip |
| | |
| 00:SCAN HDFS [functional.testtbl] |
| partitions=1/1 files=0 size=0B |
| ==== |
| # count(distinct) w/ grouping |
| select tinyint_col, count(distinct int_col, bigint_col) |
| from functional.alltypesagg |
| group by 1 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: count(if(int_col IS NULL, NULL, bigint_col)) |
| | group by: tinyint_col |
| | |
| 01:AGGREGATE |
| | group by: tinyint_col, int_col, bigint_col |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 07:EXCHANGE [UNPARTITIONED] |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: count:merge(if(int_col IS NULL, NULL, bigint_col)) |
| | group by: tinyint_col |
| | |
| 05:EXCHANGE [HASH(tinyint_col)] |
| | |
| 02:AGGREGATE [STREAMING] |
| | output: count(if(int_col IS NULL, NULL, bigint_col)) |
| | group by: tinyint_col |
| | |
| 04:AGGREGATE |
| | group by: tinyint_col, int_col, bigint_col |
| | |
| 03:EXCHANGE [HASH(tinyint_col,int_col,bigint_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | group by: tinyint_col, int_col, bigint_col |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ==== |
| # count(distinct) and sum(distinct) w/ grouping |
| select tinyint_col, count(distinct int_col), sum(distinct int_col) |
| from functional.alltypesagg |
| group by 1 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: count(int_col), sum(int_col) |
| | group by: tinyint_col |
| | |
| 01:AGGREGATE |
| | group by: tinyint_col, int_col |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 07:EXCHANGE [UNPARTITIONED] |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: count:merge(int_col), sum:merge(int_col) |
| | group by: tinyint_col |
| | |
| 05:EXCHANGE [HASH(tinyint_col)] |
| | |
| 02:AGGREGATE [STREAMING] |
| | output: count(int_col), sum(int_col) |
| | group by: tinyint_col |
| | |
| 04:AGGREGATE |
| | group by: tinyint_col, int_col |
| | |
| 03:EXCHANGE [HASH(tinyint_col,int_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | group by: tinyint_col, int_col |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ==== |
| # sum(distinct) w/o grouping |
| select sum(distinct int_col) |
| from functional.alltypesagg |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: sum(int_col) |
| | |
| 01:AGGREGATE |
| | group by: int_col |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: sum:merge(int_col) |
| | |
| 05:EXCHANGE [UNPARTITIONED] |
| | |
| 02:AGGREGATE |
| | output: sum(int_col) |
| | |
| 04:AGGREGATE |
| | group by: int_col |
| | |
| 03:EXCHANGE [HASH(int_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | group by: int_col |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ==== |
| # count(distinct) and sum(distinct) w/ grouping; distinct in min() and max() |
| # is ignored |
| select tinyint_col, count(distinct int_col), |
| min(distinct smallint_col), max(distinct string_col) |
| from functional.alltypesagg group by 1 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: count(int_col), min:merge(smallint_col), max:merge(string_col) |
| | group by: tinyint_col |
| | |
| 01:AGGREGATE |
| | output: min(smallint_col), max(string_col) |
| | group by: tinyint_col, int_col |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 07:EXCHANGE [UNPARTITIONED] |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: count:merge(int_col), min:merge(smallint_col), max:merge(string_col) |
| | group by: tinyint_col |
| | |
| 05:EXCHANGE [HASH(tinyint_col)] |
| | |
| 02:AGGREGATE [STREAMING] |
| | output: count(int_col), min:merge(smallint_col), max:merge(string_col) |
| | group by: tinyint_col |
| | |
| 04:AGGREGATE |
| | output: min:merge(smallint_col), max:merge(string_col) |
| | group by: tinyint_col, int_col |
| | |
| 03:EXCHANGE [HASH(tinyint_col,int_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | output: min(smallint_col), max(string_col) |
| | group by: tinyint_col, int_col |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ==== |
| # aggregate fns with and without distinct |
| select tinyint_col, count(distinct int_col), count(*), sum(distinct int_col), |
| sum(int_col), min(smallint_col), max(bigint_col) |
| from functional.alltypesagg group by 1 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: count(int_col), sum(int_col), count:merge(*), sum:merge(int_col), min:merge(smallint_col), max:merge(bigint_col) |
| | group by: tinyint_col |
| | |
| 01:AGGREGATE |
| | output: count(*), sum(int_col), min(smallint_col), max(bigint_col) |
| | group by: tinyint_col, int_col |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 07:EXCHANGE [UNPARTITIONED] |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: count:merge(int_col), sum:merge(int_col), count:merge(*), sum:merge(int_col), min:merge(smallint_col), max:merge(bigint_col) |
| | group by: tinyint_col |
| | |
| 05:EXCHANGE [HASH(tinyint_col)] |
| | |
| 02:AGGREGATE [STREAMING] |
| | output: count(int_col), sum(int_col), count:merge(*), sum:merge(int_col), min:merge(smallint_col), max:merge(bigint_col) |
| | group by: tinyint_col |
| | |
| 04:AGGREGATE |
| | output: count:merge(*), sum:merge(int_col), min:merge(smallint_col), max:merge(bigint_col) |
| | group by: tinyint_col, int_col |
| | |
| 03:EXCHANGE [HASH(tinyint_col,int_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | output: count(*), sum(int_col), min(smallint_col), max(bigint_col) |
| | group by: tinyint_col, int_col |
| | |
| 00:SCAN HDFS [functional.alltypesagg] |
| partitions=11/11 files=11 size=814.73KB |
| ==== |
| # test join on inline views containing distinct aggregates to make sure |
| # the aggregation info reports the correct tuple ids (from the 2nd phase |
| # distinct aggregation) for the inline-view expression substitution |
| select t1.c, t2.c from |
| (select count(distinct int_col) as c from functional.alltypestiny) t1 inner join |
| (select count(distinct bigint_col) as c from functional.alltypestiny) t2 on (t1.c = t2.c) |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 06:HASH JOIN [INNER JOIN] |
| | hash predicates: count(int_col) = count(bigint_col) |
| | |
| |--05:AGGREGATE [FINALIZE] |
| | | output: count(bigint_col) |
| | | |
| | 04:AGGREGATE |
| | | group by: bigint_col |
| | | |
| | 03:SCAN HDFS [functional.alltypestiny] |
| | partitions=4/4 files=4 size=460B |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: count(int_col) |
| | |
| 01:AGGREGATE |
| | group by: int_col |
| | |
| 00:SCAN HDFS [functional.alltypestiny] |
| partitions=4/4 files=4 size=460B |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 06:HASH JOIN [INNER JOIN, BROADCAST] |
| | hash predicates: count(int_col) = count(bigint_col) |
| | |
| |--15:EXCHANGE [UNPARTITIONED] |
| | | |
| | 14:AGGREGATE [FINALIZE] |
| | | output: count:merge(bigint_col) |
| | | |
| | 13:EXCHANGE [UNPARTITIONED] |
| | | |
| | 05:AGGREGATE |
| | | output: count(bigint_col) |
| | | |
| | 12:AGGREGATE |
| | | group by: bigint_col |
| | | |
| | 11:EXCHANGE [HASH(bigint_col)] |
| | | |
| | 04:AGGREGATE [STREAMING] |
| | | group by: bigint_col |
| | | |
| | 03:SCAN HDFS [functional.alltypestiny] |
| | partitions=4/4 files=4 size=460B |
| | |
| 10:AGGREGATE [FINALIZE] |
| | output: count:merge(int_col) |
| | |
| 09:EXCHANGE [UNPARTITIONED] |
| | |
| 02:AGGREGATE |
| | output: count(int_col) |
| | |
| 08:AGGREGATE |
| | group by: int_col |
| | |
| 07:EXCHANGE [HASH(int_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | group by: int_col |
| | |
| 00:SCAN HDFS [functional.alltypestiny] |
| partitions=4/4 files=4 size=460B |
| ==== |
| # Test placement of having predicate into 2nd phase merge agg for |
| # distinct + non-distinct aggregates without group by (IMPALA-845). |
| # TODO: Fix the incorrect labels for non-distinct agg expr after the |
| # 1st phase merge. We'd need to create more smaps during analysis |
| # because there are more than two levels of merging for the |
| # non-distinct agg expr. |
| select count(distinct tinyint_col) from functional.alltypes |
| having count(bigint_col) > 0 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: count(tinyint_col), count:merge(bigint_col) |
| | having: zeroifnull(count(bigint_col)) > 0 |
| | |
| 01:AGGREGATE |
| | output: count(bigint_col) |
| | group by: tinyint_col |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: count:merge(tinyint_col), count:merge(bigint_col) |
| | having: zeroifnull(count(bigint_col)) > 0 |
| | |
| 05:EXCHANGE [UNPARTITIONED] |
| | |
| 02:AGGREGATE |
| | output: count(tinyint_col), count:merge(bigint_col) |
| | |
| 04:AGGREGATE |
| | output: count:merge(bigint_col) |
| | group by: tinyint_col |
| | |
| 03:EXCHANGE [HASH(tinyint_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | output: count(bigint_col) |
| | group by: tinyint_col |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # test slot materialization on a distinct agg inside an inline view |
| # triggered by a predicate in an outer query block (IMPALA-861) |
| select 1 from |
| (select count(distinct 1) x from functional.alltypes) t |
| where t.x is not null |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: count(1) |
| | having: count(1) IS NOT NULL |
| | |
| 01:AGGREGATE |
| | group by: 1 |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: count:merge(1) |
| | having: count(1) IS NOT NULL |
| | |
| 05:EXCHANGE [UNPARTITIONED] |
| | |
| 02:AGGREGATE |
| | output: count(1) |
| | |
| 04:AGGREGATE |
| | group by: 1 |
| | |
| 03:EXCHANGE [HASH(1)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | group by: 1 |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # test slot materialization on a distinct agg inside an inline view |
| # triggered by a predicate in an outer query block (IMPALA-861) |
| select 1 from |
| (select count(distinct 1) x, count(1) y from functional.alltypes) t |
| where t.x + t.y > 10 and t.x > 0 and t.y > 1 |
| ---- PLAN |
| PLAN-ROOT SINK |
| | |
| 02:AGGREGATE [FINALIZE] |
| | output: count(1), count:merge(*) |
| | having: count(1) > 0, zeroifnull(count(*)) > 1, count(1) + zeroifnull(count(*)) > 10 |
| | |
| 01:AGGREGATE |
| | output: count(*) |
| | group by: 1 |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: count:merge(1), count:merge(*) |
| | having: count(1) > 0, zeroifnull(count(*)) > 1, count(1) + zeroifnull(count(*)) > 10 |
| | |
| 05:EXCHANGE [UNPARTITIONED] |
| | |
| 02:AGGREGATE |
| | output: count(1), count:merge(*) |
| | |
| 04:AGGREGATE |
| | output: count:merge(*) |
| | group by: 1 |
| | |
| 03:EXCHANGE [HASH(1)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | output: count(*) |
| | group by: 1 |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # IMPALA-2266: Test non-grouping distinct aggregation inside an inline view. |
| select * from (select count(distinct int_col) cd from functional.alltypes) v |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: count:merge(int_col) |
| | |
| 05:EXCHANGE [UNPARTITIONED] |
| | |
| 02:AGGREGATE |
| | output: count(int_col) |
| | |
| 04:AGGREGATE |
| | group by: int_col |
| | |
| 03:EXCHANGE [HASH(int_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | group by: int_col |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # IMPALA-2266: Test grouping distinct aggregation inside an inline view. |
| select * from (select count(distinct int_col) cd from functional.alltypes group by bool_col) v |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 07:EXCHANGE [UNPARTITIONED] |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: count:merge(int_col) |
| | group by: bool_col |
| | |
| 05:EXCHANGE [HASH(bool_col)] |
| | |
| 02:AGGREGATE [STREAMING] |
| | output: count(int_col) |
| | group by: bool_col |
| | |
| 04:AGGREGATE |
| | group by: bool_col, int_col |
| | |
| 03:EXCHANGE [HASH(bool_col,int_col)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | group by: bool_col, int_col |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |
| # IMPALA-4042: count(distinct NULL) fails on a view |
| select count(distinct null) from functional.alltypes_view |
| ---- DISTRIBUTEDPLAN |
| PLAN-ROOT SINK |
| | |
| 06:AGGREGATE [FINALIZE] |
| | output: count:merge(NULL) |
| | |
| 05:EXCHANGE [UNPARTITIONED] |
| | |
| 02:AGGREGATE |
| | output: count(NULL) |
| | |
| 04:AGGREGATE |
| | group by: NULL |
| | |
| 03:EXCHANGE [HASH(NULL)] |
| | |
| 01:AGGREGATE [STREAMING] |
| | group by: NULL |
| | |
| 00:SCAN HDFS [functional.alltypes] |
| partitions=24/24 files=24 size=478.45KB |
| ==== |