testdata/workloads/functional-planner/queries/PlannerTest/aggregation.test - impala - Git at Google

 # basic aggregation
 select count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
 avg(tinyint_col)
 from functional.alltypesagg
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col)
 |  row-size=34B cardinality=1
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    row-size=1B cardinality=11.00K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 03:AGGREGATE [FINALIZE]
 |  output: count:merge(*), count:merge(tinyint_col), min:merge(tinyint_col), max:merge(tinyint_col), sum:merge(tinyint_col), avg:merge(tinyint_col)
 |  row-size=34B cardinality=1
 |
 02:EXCHANGE [UNPARTITIONED]
 |
 01:AGGREGATE
 |  output: count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col)
 |  row-size=34B cardinality=3
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    row-size=1B cardinality=11.00K
 ====
 # with grouping
 select tinyint_col, bigint_col, count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
 avg(tinyint_col)
 from functional.alltypesagg
 group by 2, 1
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col)
 |  group by: bigint_col, tinyint_col
 |  row-size=35B cardinality=9.07K
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    row-size=9B cardinality=11.00K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 04:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE [FINALIZE]
 |  output: count:merge(*), min:merge(tinyint_col), max:merge(tinyint_col), sum:merge(tinyint_col), avg:merge(tinyint_col)
 |  group by: bigint_col, tinyint_col
 |  row-size=35B cardinality=9.05K
 |
 02:EXCHANGE [HASH(bigint_col,tinyint_col)]
 |
 01:AGGREGATE [STREAMING]
 |  output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col)
 |  group by: bigint_col, tinyint_col
 |  row-size=35B cardinality=9.05K
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    row-size=9B cardinality=11.00K
 ====
 # avg substitution
 select avg(id)
 from functional.testtbl
 having count(id) > 0
 order by avg(zip) limit 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:TOP-N [LIMIT=10]
 |  order by: avg(zip) ASC
 |  row-size=16B cardinality=1
 |
 01:AGGREGATE [FINALIZE]
 |  output: avg(id), count(id), avg(zip)
 |  having: count(id) > 0
 |  row-size=24B cardinality=1
 |
 00:SCAN HDFS [functional.testtbl]
    HDFS partitions=1/1 files=0 size=0B
    row-size=12B cardinality=0
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 02:TOP-N [LIMIT=10]
 |  order by: avg(zip) ASC
 |  row-size=16B cardinality=1
 |
 04:AGGREGATE [FINALIZE]
 |  output: avg:merge(id), count:merge(id), avg:merge(zip)
 |  having: count(id) > 0
 |  row-size=24B cardinality=1
 |
 03:EXCHANGE [UNPARTITIONED]
 |
 01:AGGREGATE
 |  output: avg(id), count(id), avg(zip)
 |  row-size=24B cardinality=0
 |
 00:SCAN HDFS [functional.testtbl]
    HDFS partitions=1/1 files=0 size=0B
    row-size=12B cardinality=0
 ====
 # Test correct removal of redundant group-by expressions (IMPALA-817)
 select int_col + int_col, int_col * int_col
 from functional.alltypesagg
 group by int_col + int_col, int_col * int_col, int_col + int_col
 having (int_col * int_col) < 0 limit 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  group by: int_col + int_col, int_col * int_col
 |  having: int_col * int_col < 0
 |  limit: 10
 |  row-size=16B cardinality=10
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    row-size=4B cardinality=11.00K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 04:EXCHANGE [UNPARTITIONED]
 |  limit: 10
 |
 03:AGGREGATE [FINALIZE]
 |  group by: int_col + int_col, int_col * int_col
 |  having: int_col * int_col < 0
 |  limit: 10
 |  row-size=16B cardinality=10
 |
 02:EXCHANGE [HASH(int_col + int_col,int_col * int_col)]
 |
 01:AGGREGATE [STREAMING]
 |  group by: int_col + int_col, int_col * int_col
 |  row-size=16B cardinality=11.00K
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    row-size=4B cardinality=11.00K
 ====
 # Tests that a having predicate triggers slot materialization (IMPALA-846).
 select count(*) from
 functional.alltypes t1 inner join functional.alltypestiny t2
   on t1.smallint_col = t2.smallint_col
 group by t1.tinyint_col, t2.smallint_col
 having count(t2.int_col) = count(t1.bigint_col)
 ---- PLAN
 PLAN-ROOT SINK
 |
 03:AGGREGATE [FINALIZE]
 |  output: count(*), count(t2.int_col), count(t1.bigint_col)
 |  group by: t1.tinyint_col, t2.smallint_col
 |  having: count(t2.int_col) = count(t1.bigint_col)
 |  row-size=27B cardinality=2
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.smallint_col = t2.smallint_col
 |  runtime filters: RF000 <- t2.smallint_col
 |  row-size=17B cardinality=5.84K
 |
 |--01:SCAN HDFS [functional.alltypestiny t2]
 |     HDFS partitions=4/4 files=4 size=460B
 |     row-size=6B cardinality=8
 |
 00:SCAN HDFS [functional.alltypes t1]
    HDFS partitions=24/24 files=24 size=478.45KB
    runtime filters: RF000 -> t1.smallint_col
    row-size=11B cardinality=7.30K
 ====
 # Tests proper slot materialization of agg-tuple slots for avg (IMP-1271).
 # 't.x > 10' is picked up as an unassigned conjunct, and not as a binding
 # predicate because avg gets rewritten into an expr against two slots
 # (and getBoundPredicates() cannot handle multi-slot predicates).
 select 1 from
   (select int_col, avg(bigint_col) x from functional.alltypes
    group by int_col) t
 where t.x > 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  output: avg(bigint_col)
 |  group by: int_col
 |  having: avg(bigint_col) > 10
 |  row-size=12B cardinality=1
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=12B cardinality=7.30K
 ====
 # test distributed aggregation over unions (IMPALA-831)
 # non-distinct agg without grouping over a union
 select count(*) from
   (select * from functional.alltypes
    union all
    select * from functional.alltypessmall) t
 limit 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  limit: 10
 |  row-size=8B cardinality=1
 |
 00:UNION
 |  pass-through-operands: all
 |  row-size=0B cardinality=7.40K
 |
 |--02:SCAN HDFS [functional.alltypessmall]
 |     HDFS partitions=4/4 files=4 size=6.32KB
 |     row-size=0B cardinality=100
 |
 01:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=0B cardinality=7.30K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 05:AGGREGATE [FINALIZE]
 |  output: count:merge(*)
 |  limit: 10
 |  row-size=8B cardinality=1
 |
 04:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE
 |  output: count(*)
 |  row-size=8B cardinality=3
 |
 00:UNION
 |  pass-through-operands: all
 |  row-size=0B cardinality=7.40K
 |
 |--02:SCAN HDFS [functional.alltypessmall]
 |     HDFS partitions=4/4 files=4 size=6.32KB
 |     row-size=0B cardinality=100
 |
 01:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=0B cardinality=7.30K
 ====
 # non-distinct agg with grouping over a union
 select count(*) from
   (select * from functional.alltypes
    union all
    select * from functional.alltypessmall) t
 group by t.bigint_col
 limit 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: bigint_col
 |  limit: 10
 |  row-size=16B cardinality=10
 |
 00:UNION
 |  pass-through-operands: all
 |  row-size=8B cardinality=7.40K
 |
 |--02:SCAN HDFS [functional.alltypessmall]
 |     HDFS partitions=4/4 files=4 size=6.32KB
 |     row-size=8B cardinality=100
 |
 01:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=8B cardinality=7.30K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 06:EXCHANGE [UNPARTITIONED]
 |  limit: 10
 |
 05:AGGREGATE [FINALIZE]
 |  output: count:merge(*)
 |  group by: t.bigint_col
 |  limit: 10
 |  row-size=16B cardinality=10
 |
 04:EXCHANGE [HASH(t.bigint_col)]
 |
 03:AGGREGATE [STREAMING]
 |  output: count(*)
 |  group by: bigint_col
 |  row-size=16B cardinality=60
 |
 00:UNION
 |  pass-through-operands: all
 |  row-size=8B cardinality=7.40K
 |
 |--02:SCAN HDFS [functional.alltypessmall]
 |     HDFS partitions=4/4 files=4 size=6.32KB
 |     row-size=8B cardinality=100
 |
 01:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=8B cardinality=7.30K
 ====
 # distinct agg without grouping over a union
 select count(distinct int_col)
 from
   (select * from functional.alltypes
    union all
    select * from functional.alltypessmall) t
 limit 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 04:AGGREGATE [FINALIZE]
 |  output: count(int_col)
 |  limit: 10
 |  row-size=8B cardinality=1
 |
 03:AGGREGATE
 |  group by: int_col
 |  row-size=4B cardinality=20
 |
 00:UNION
 |  pass-through-operands: all
 |  row-size=4B cardinality=7.40K
 |
 |--02:SCAN HDFS [functional.alltypessmall]
 |     HDFS partitions=4/4 files=4 size=6.32KB
 |     row-size=4B cardinality=100
 |
 01:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=4B cardinality=7.30K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 08:AGGREGATE [FINALIZE]
 |  output: count:merge(int_col)
 |  limit: 10
 |  row-size=8B cardinality=1
 |
 07:EXCHANGE [UNPARTITIONED]
 |
 04:AGGREGATE
 |  output: count(int_col)
 |  row-size=8B cardinality=1
 |
 06:AGGREGATE
 |  group by: int_col
 |  row-size=4B cardinality=20
 |
 05:EXCHANGE [HASH(int_col)]
 |
 03:AGGREGATE [STREAMING]
 |  group by: int_col
 |  row-size=4B cardinality=60
 |
 00:UNION
 |  pass-through-operands: all
 |  row-size=4B cardinality=7.40K
 |
 |--02:SCAN HDFS [functional.alltypessmall]
 |     HDFS partitions=4/4 files=4 size=6.32KB
 |     row-size=4B cardinality=100
 |
 01:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=4B cardinality=7.30K
 ====
 # distinct agg with grouping over a union
 select count(distinct int_col)
 from
   (select * from functional.alltypes
    union all
    select * from functional.alltypessmall) t
 group by t.bigint_col
 limit 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 04:AGGREGATE [FINALIZE]
 |  output: count(int_col)
 |  group by: t.bigint_col
 |  limit: 10
 |  row-size=16B cardinality=10
 |
 03:AGGREGATE
 |  group by: bigint_col, int_col
 |  row-size=12B cardinality=400
 |
 00:UNION
 |  pass-through-operands: all
 |  row-size=12B cardinality=7.40K
 |
 |--02:SCAN HDFS [functional.alltypessmall]
 |     HDFS partitions=4/4 files=4 size=6.32KB
 |     row-size=12B cardinality=100
 |
 01:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=12B cardinality=7.30K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 09:EXCHANGE [UNPARTITIONED]
 |  limit: 10
 |
 08:AGGREGATE [FINALIZE]
 |  output: count:merge(int_col)
 |  group by: t.bigint_col
 |  limit: 10
 |  row-size=16B cardinality=10
 |
 07:EXCHANGE [HASH(t.bigint_col)]
 |
 04:AGGREGATE [STREAMING]
 |  output: count(int_col)
 |  group by: t.bigint_col
 |  row-size=16B cardinality=60
 |
 06:AGGREGATE
 |  group by: t.bigint_col, int_col
 |  row-size=12B cardinality=400
 |
 05:EXCHANGE [HASH(t.bigint_col,int_col)]
 |
 03:AGGREGATE [STREAMING]
 |  group by: bigint_col, int_col
 |  row-size=12B cardinality=1.20K
 |
 00:UNION
 |  pass-through-operands: all
 |  row-size=12B cardinality=7.40K
 |
 |--02:SCAN HDFS [functional.alltypessmall]
 |     HDFS partitions=4/4 files=4 size=6.32KB
 |     row-size=12B cardinality=100
 |
 01:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=12B cardinality=7.30K
 ====
 # mixed distinct and non-distinct agg without grouping over a union
 select count(smallint_col), count(distinct int_col)
 from
   (select * from functional.alltypes
    union all
    select * from functional.alltypessmall) t
 limit 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 04:AGGREGATE [FINALIZE]
 |  output: count(int_col), count:merge(smallint_col)
 |  limit: 10
 |  row-size=16B cardinality=1
 |
 03:AGGREGATE
 |  output: count(smallint_col)
 |  group by: int_col
 |  row-size=12B cardinality=20
 |
 00:UNION
 |  pass-through-operands: all
 |  row-size=6B cardinality=7.40K
 |
 |--02:SCAN HDFS [functional.alltypessmall]
 |     HDFS partitions=4/4 files=4 size=6.32KB
 |     row-size=6B cardinality=100
 |
 01:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=6B cardinality=7.30K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 08:AGGREGATE [FINALIZE]
 |  output: count:merge(int_col), count:merge(smallint_col)
 |  limit: 10
 |  row-size=16B cardinality=1
 |
 07:EXCHANGE [UNPARTITIONED]
 |
 04:AGGREGATE
 |  output: count(int_col), count:merge(smallint_col)
 |  row-size=16B cardinality=1
 |
 06:AGGREGATE
 |  output: count:merge(smallint_col)
 |  group by: int_col
 |  row-size=12B cardinality=20
 |
 05:EXCHANGE [HASH(int_col)]
 |
 03:AGGREGATE [STREAMING]
 |  output: count(smallint_col)
 |  group by: int_col
 |  row-size=12B cardinality=60
 |
 00:UNION
 |  pass-through-operands: all
 |  row-size=6B cardinality=7.40K
 |
 |--02:SCAN HDFS [functional.alltypessmall]
 |     HDFS partitions=4/4 files=4 size=6.32KB
 |     row-size=6B cardinality=100
 |
 01:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=6B cardinality=7.30K
 ====
 # mixed distinct and non-distinct agg with grouping over a union
 select count(smallint_col), count(distinct int_col)
 from
   (select * from functional.alltypes
    union all
    select * from functional.alltypessmall) t
 group by t.bigint_col
 limit 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 04:AGGREGATE [FINALIZE]
 |  output: count(int_col), count:merge(smallint_col)
 |  group by: t.bigint_col
 |  limit: 10
 |  row-size=24B cardinality=10
 |
 03:AGGREGATE
 |  output: count(smallint_col)
 |  group by: bigint_col, int_col
 |  row-size=20B cardinality=400
 |
 00:UNION
 |  pass-through-operands: all
 |  row-size=14B cardinality=7.40K
 |
 |--02:SCAN HDFS [functional.alltypessmall]
 |     HDFS partitions=4/4 files=4 size=6.32KB
 |     row-size=14B cardinality=100
 |
 01:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=14B cardinality=7.30K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 09:EXCHANGE [UNPARTITIONED]
 |  limit: 10
 |
 08:AGGREGATE [FINALIZE]
 |  output: count:merge(int_col), count:merge(smallint_col)
 |  group by: t.bigint_col
 |  limit: 10
 |  row-size=24B cardinality=10
 |
 07:EXCHANGE [HASH(t.bigint_col)]
 |
 04:AGGREGATE [STREAMING]
 |  output: count(int_col), count:merge(smallint_col)
 |  group by: t.bigint_col
 |  row-size=24B cardinality=60
 |
 06:AGGREGATE
 |  output: count:merge(smallint_col)
 |  group by: t.bigint_col, int_col
 |  row-size=20B cardinality=400
 |
 05:EXCHANGE [HASH(t.bigint_col,int_col)]
 |
 03:AGGREGATE [STREAMING]
 |  output: count(smallint_col)
 |  group by: bigint_col, int_col
 |  row-size=20B cardinality=1.20K
 |
 00:UNION
 |  pass-through-operands: all
 |  row-size=14B cardinality=7.40K
 |
 |--02:SCAN HDFS [functional.alltypessmall]
 |     HDFS partitions=4/4 files=4 size=6.32KB
 |     row-size=14B cardinality=100
 |
 01:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=14B cardinality=7.30K
 ====
 # mixed distinct and non-distinct agg with grouping over a union distinct
 select count(smallint_col), count(distinct int_col)
 from
   (select * from functional.alltypes
    union distinct
    select * from functional.alltypessmall) t
 group by t.bigint_col
 limit 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 05:AGGREGATE [FINALIZE]
 |  output: count(int_col), count:merge(smallint_col)
 |  group by: t.bigint_col
 |  limit: 10
 |  row-size=24B cardinality=10
 |
 04:AGGREGATE
 |  output: count(smallint_col)
 |  group by: bigint_col, int_col
 |  row-size=20B cardinality=400
 |
 03:AGGREGATE [FINALIZE]
 |  group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
 |  row-size=89B cardinality=7.40K
 |
 00:UNION
 |  pass-through-operands: all
 |  row-size=89B cardinality=7.40K
 |
 |--02:SCAN HDFS [functional.alltypessmall]
 |     HDFS partitions=4/4 files=4 size=6.32KB
 |     row-size=89B cardinality=100
 |
 01:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=89B cardinality=7.30K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 12:EXCHANGE [UNPARTITIONED]
 |  limit: 10
 |
 11:AGGREGATE [FINALIZE]
 |  output: count:merge(int_col), count:merge(smallint_col)
 |  group by: t.bigint_col
 |  limit: 10
 |  row-size=24B cardinality=10
 |
 10:EXCHANGE [HASH(t.bigint_col)]
 |
 05:AGGREGATE [STREAMING]
 |  output: count(int_col), count:merge(smallint_col)
 |  group by: t.bigint_col
 |  row-size=24B cardinality=60
 |
 09:AGGREGATE
 |  output: count:merge(smallint_col)
 |  group by: t.bigint_col, int_col
 |  row-size=20B cardinality=400
 |
 08:EXCHANGE [HASH(t.bigint_col,int_col)]
 |
 04:AGGREGATE [STREAMING]
 |  output: count(smallint_col)
 |  group by: bigint_col, int_col
 |  row-size=20B cardinality=1.20K
 |
 07:AGGREGATE [FINALIZE]
 |  group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
 |  row-size=89B cardinality=7.40K
 |
 06:EXCHANGE [HASH(id,bool_col,tinyint_col,smallint_col,int_col,bigint_col,float_col,double_col,date_string_col,string_col,timestamp_col,year,month)]
 |
 03:AGGREGATE [STREAMING]
 |  group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
 |  row-size=89B cardinality=7.40K
 |
 00:UNION
 |  pass-through-operands: all
 |  row-size=89B cardinality=7.40K
 |
 |--02:SCAN HDFS [functional.alltypessmall]
 |     HDFS partitions=4/4 files=4 size=6.32KB
 |     row-size=89B cardinality=100
 |
 01:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=89B cardinality=7.30K
 ====
 # Mixed distinct and non-distinct agg with intermediate type different from input type
 # Regression test for IMPALA-5251 to exercise validateMergeAggFn() in FunctionCallExpr.
 select avg(l_quantity), ndv(l_discount), count(distinct l_partkey)
 from tpch_parquet.lineitem;
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:AGGREGATE [FINALIZE]
 |  output: count(l_partkey), avg:merge(l_quantity), ndv:merge(l_discount)
 |  row-size=24B cardinality=1
 |
 01:AGGREGATE
 |  output: avg(l_quantity), ndv(l_discount)
 |  group by: l_partkey
 |  row-size=24B cardinality=200.52K
 |
 00:SCAN HDFS [tpch_parquet.lineitem]
    HDFS partitions=1/1 files=3 size=193.99MB
    row-size=24B cardinality=6.00M
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 06:AGGREGATE [FINALIZE]
 |  output: count:merge(l_partkey), avg:merge(l_quantity), ndv:merge(l_discount)
 |  row-size=24B cardinality=1
 |
 05:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE
 |  output: count(l_partkey), avg:merge(l_quantity), ndv:merge(l_discount)
 |  row-size=24B cardinality=1
 |
 04:AGGREGATE
 |  output: avg:merge(l_quantity), ndv:merge(l_discount)
 |  group by: l_partkey
 |  row-size=24B cardinality=200.52K
 |
 03:EXCHANGE [HASH(l_partkey)]
 |
 01:AGGREGATE [STREAMING]
 |  output: avg(l_quantity), ndv(l_discount)
 |  group by: l_partkey
 |  row-size=24B cardinality=601.52K
 |
 00:SCAN HDFS [tpch_parquet.lineitem]
    HDFS partitions=1/1 files=3 size=193.99MB
    row-size=24B cardinality=6.00M
 ====
 # test that aggregations are not placed below an unpartitioned exchange with a limit
 select count(*) from (select * from functional.alltypes limit 10) t
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  row-size=8B cardinality=1
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    limit: 10
    row-size=0B cardinality=10
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  row-size=8B cardinality=1
 |
 02:EXCHANGE [UNPARTITIONED]
 |  limit: 10
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    limit: 10
    row-size=0B cardinality=10
 ====
 # test that aggregations are not placed below an unpartitioned exchange with a limit
 select count(*) from
   (select * from functional.alltypes
    union all
    (select * from functional.alltypessmall) limit 10) t
 ---- PLAN
 PLAN-ROOT SINK
 |
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  row-size=8B cardinality=1
 |
 00:UNION
 |  pass-through-operands: all
 |  limit: 10
 |  row-size=0B cardinality=10
 |
 |--02:SCAN HDFS [functional.alltypessmall]
 |     HDFS partitions=4/4 files=4 size=6.32KB
 |     row-size=0B cardinality=100
 |
 01:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=0B cardinality=7.30K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  row-size=8B cardinality=1
 |
 04:EXCHANGE [UNPARTITIONED]
 |  limit: 10
 |
 00:UNION
 |  pass-through-operands: all
 |  limit: 10
 |  row-size=0B cardinality=10
 |
 |--02:SCAN HDFS [functional.alltypessmall]
 |     HDFS partitions=4/4 files=4 size=6.32KB
 |     row-size=0B cardinality=100
 |
 01:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=0B cardinality=7.30K
 ====
 # test that limits are applied at the top-level merge aggregation node for non-grouping
 # distinct aggregation (IMPALA-1802)
 select * from (
   select count(distinct cnt) from
     (select count(distinct t1.id) as cnt
      from functional.alltypesagg t1 join functional.alltypestiny t2 on t1.id = t2.id
      limit 10) t
   limit 2) v
 limit 1
 ---- PLAN
 PLAN-ROOT SINK
 |
 06:AGGREGATE [FINALIZE]
 |  output: count(cnt)
 |  limit: 1
 |  row-size=8B cardinality=1
 |
 05:AGGREGATE
 |  group by: count(t1.id)
 |  row-size=8B cardinality=1
 |
 04:AGGREGATE [FINALIZE]
 |  output: count(t1.id)
 |  limit: 10
 |  row-size=8B cardinality=1
 |
 03:AGGREGATE
 |  group by: t1.id
 |  row-size=4B cardinality=9
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: t1.id = t2.id
 |  runtime filters: RF000 <- t2.id
 |  row-size=8B cardinality=9
 |
 |--01:SCAN HDFS [functional.alltypestiny t2]
 |     HDFS partitions=4/4 files=4 size=460B
 |     row-size=4B cardinality=8
 |
 00:SCAN HDFS [functional.alltypesagg t1]
    HDFS partitions=11/11 files=11 size=814.73KB
    runtime filters: RF000 -> t1.id
    row-size=4B cardinality=11.00K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 06:AGGREGATE [FINALIZE]
 |  output: count(cnt)
 |  limit: 1
 |  row-size=8B cardinality=1
 |
 05:AGGREGATE
 |  group by: count(t1.id)
 |  row-size=8B cardinality=1
 |
 11:AGGREGATE [FINALIZE]
 |  output: count:merge(t1.id)
 |  limit: 10
 |  row-size=8B cardinality=1
 |
 10:EXCHANGE [UNPARTITIONED]
 |
 04:AGGREGATE
 |  output: count(t1.id)
 |  row-size=8B cardinality=1
 |
 09:AGGREGATE
 |  group by: t1.id
 |  row-size=4B cardinality=9
 |
 08:EXCHANGE [HASH(t1.id)]
 |
 03:AGGREGATE [STREAMING]
 |  group by: t1.id
 |  row-size=4B cardinality=9
 |
 02:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: t1.id = t2.id
 |  runtime filters: RF000 <- t2.id
 |  row-size=8B cardinality=9
 |
 |--07:EXCHANGE [BROADCAST]
 |  |
 |  01:SCAN HDFS [functional.alltypestiny t2]
 |     HDFS partitions=4/4 files=4 size=460B
 |     row-size=4B cardinality=8
 |
 00:SCAN HDFS [functional.alltypesagg t1]
    HDFS partitions=11/11 files=11 size=814.73KB
    runtime filters: RF000 -> t1.id
    row-size=4B cardinality=11.00K
 ====
 # IMPALA-2089: Tests correct elimination of redundant predicates.
 # The equivalences between inline-view slots are enforced inside the inline-view plan.
 # Equivalences between simple grouping slots (with SlotRef grouping exprs) are enforced
 # at the scan, and equivalences between grouping slots with complex grouping exprs are
 # enforced at the aggregation.
 # a, b, c, d are in the same equivalence class and some predicates are redundant.
 select * from
   (select tinyint_col a, smallint_col b, int_col + int_col c, coalesce(bigint_col, year) d
    from functional.alltypes
    group by 1, 2, 3, 4) v
 where v.a = v.b and v.b = v.c and v.c = v.d and v.a = v.c and v.a = v.d
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  group by: tinyint_col, smallint_col, int_col + int_col, coalesce(bigint_col, `year`)
 |  having: int_col + int_col = coalesce(bigint_col, `year`), smallint_col = int_col + int_col
 |  row-size=19B cardinality=73
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    predicates: functional.alltypes.tinyint_col = functional.alltypes.smallint_col
    row-size=19B cardinality=730
 ====
 # IMPALA-1917: Test NULL literals inside inline view with grouping aggregation.
 select cnt from
   (select bool_col, count(*) cnt, cast(NULL as int) as x, cast(NULL as int) as y
    from functional.alltypestiny
    group by bool_col, x) v
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: bool_col, CAST(NULL AS INT)
 |  row-size=13B cardinality=2
 |
 00:SCAN HDFS [functional.alltypestiny]
    HDFS partitions=4/4 files=4 size=460B
    row-size=1B cardinality=8
 ====
 # IMPALA-1917: Test NULL literals inside inline view with grouping aggregation.
 select cnt from
   (select bool_col, count(distinct int_col) cnt, NULL as x, NULL as y
    from functional.alltypestiny
    group by bool_col, x) v
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:AGGREGATE [FINALIZE]
 |  output: count(int_col)
 |  group by: bool_col, NULL
 |  row-size=10B cardinality=2
 |
 01:AGGREGATE
 |  group by: bool_col, NULL, int_col
 |  row-size=6B cardinality=4
 |
 00:SCAN HDFS [functional.alltypestiny]
    HDFS partitions=4/4 files=4 size=460B
    row-size=5B cardinality=8
 ====
 # test simple group_concat with distinct
 select group_concat(distinct string_col) from functional.alltypesagg
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:AGGREGATE [FINALIZE]
 |  output: group_concat(string_col)
 |  row-size=12B cardinality=1
 |
 01:AGGREGATE
 |  group by: string_col
 |  row-size=15B cardinality=963
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    row-size=15B cardinality=11.00K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 06:AGGREGATE [FINALIZE]
 |  output: group_concat:merge(string_col)
 |  row-size=12B cardinality=1
 |
 05:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE
 |  output: group_concat(string_col)
 |  row-size=12B cardinality=1
 |
 04:AGGREGATE
 |  group by: string_col
 |  row-size=15B cardinality=963
 |
 03:EXCHANGE [HASH(string_col)]
 |
 01:AGGREGATE [STREAMING]
 |  group by: string_col
 |  row-size=15B cardinality=2.83K
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    row-size=15B cardinality=11.00K
 ====
 # test group_concat and a group by
 select day, group_concat(distinct string_col)
 from (select * from functional.alltypesagg where id % 100 = day order by id limit 99999) a
 group by day
 ---- PLAN
 PLAN-ROOT SINK
 |
 03:AGGREGATE [FINALIZE]
 |  output: group_concat(string_col)
 |  group by: `day`
 |  row-size=16B cardinality=11
 |
 02:AGGREGATE
 |  group by: day, string_col
 |  row-size=19B cardinality=1.10K
 |
 01:TOP-N [LIMIT=99999]
 |  order by: id ASC
 |  row-size=23B cardinality=1.10K
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    predicates: `day` = id % 100
    row-size=23B cardinality=1.10K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 03:AGGREGATE [FINALIZE]
 |  output: group_concat(string_col)
 |  group by: `day`
 |  row-size=16B cardinality=11
 |
 02:AGGREGATE
 |  group by: day, string_col
 |  row-size=19B cardinality=1.10K
 |
 04:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: id ASC
 |  limit: 99999
 |
 01:TOP-N [LIMIT=99999]
 |  order by: id ASC
 |  row-size=23B cardinality=1.10K
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    predicates: `day` = id % 100
    row-size=23B cardinality=1.10K
 ====
 # test group_concat with distinct together with another distinct aggregate function
 select count(distinct cast(timestamp_col as string)),
 group_concat(distinct cast(timestamp_col as string))
 from functional.alltypesagg group by year
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:AGGREGATE [FINALIZE]
 |  output: count(CAST(timestamp_col AS STRING)), group_concat(CAST(timestamp_col AS STRING))
 |  group by: `year`
 |  row-size=24B cardinality=1
 |
 01:AGGREGATE
 |  group by: `year`, CAST(timestamp_col AS STRING)
 |  row-size=20B cardinality=10.21K
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    row-size=20B cardinality=11.00K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 07:EXCHANGE [UNPARTITIONED]
 |
 06:AGGREGATE [FINALIZE]
 |  output: count:merge(CAST(timestamp_col AS STRING)), group_concat:merge(CAST(timestamp_col AS STRING))
 |  group by: `year`
 |  row-size=24B cardinality=1
 |
 05:EXCHANGE [HASH(`year`)]
 |
 02:AGGREGATE [STREAMING]
 |  output: count(CAST(timestamp_col AS STRING)), group_concat(CAST(timestamp_col AS STRING))
 |  group by: `year`
 |  row-size=24B cardinality=3
 |
 04:AGGREGATE
 |  group by: `year`, CAST(timestamp_col AS STRING)
 |  row-size=20B cardinality=9.24K
 |
 03:EXCHANGE [HASH(`year`,CAST(timestamp_col AS STRING))]
 |
 01:AGGREGATE [STREAMING]
 |  group by: `year`, CAST(timestamp_col AS STRING)
 |  row-size=20B cardinality=9.24K
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    row-size=20B cardinality=11.00K
 ====
 # test group_concat distinct with other non-distinct aggregate functions
  select group_concat(distinct string_col), count(*) from functional.alltypesagg
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:AGGREGATE [FINALIZE]
 |  output: group_concat(string_col), count:merge(*)
 |  row-size=20B cardinality=1
 |
 01:AGGREGATE
 |  output: count(*)
 |  group by: string_col
 |  row-size=23B cardinality=963
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    row-size=15B cardinality=11.00K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 06:AGGREGATE [FINALIZE]
 |  output: group_concat:merge(string_col), count:merge(*)
 |  row-size=20B cardinality=1
 |
 05:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE
 |  output: group_concat(string_col), count:merge(*)
 |  row-size=20B cardinality=1
 |
 04:AGGREGATE
 |  output: count:merge(*)
 |  group by: string_col
 |  row-size=23B cardinality=963
 |
 03:EXCHANGE [HASH(string_col)]
 |
 01:AGGREGATE [STREAMING]
 |  output: count(*)
 |  group by: string_col
 |  row-size=23B cardinality=2.83K
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    row-size=15B cardinality=11.00K
 ====
 # test group_concat distinct with other aggregate functions, with custom separator
 select group_concat(distinct string_col, '-'), sum(int_col), count(distinct string_col)
 from functional.alltypesagg
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:AGGREGATE [FINALIZE]
 |  output: group_concat(string_col, '-'), count(string_col), sum:merge(int_col)
 |  row-size=28B cardinality=1
 |
 01:AGGREGATE
 |  output: sum(int_col)
 |  group by: string_col
 |  row-size=23B cardinality=963
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    row-size=19B cardinality=11.00K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 06:AGGREGATE [FINALIZE]
 |  output: group_concat:merge(string_col, '-'), count:merge(string_col), sum:merge(int_col)
 |  row-size=28B cardinality=1
 |
 05:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE
 |  output: group_concat(string_col, '-'), count(string_col), sum:merge(int_col)
 |  row-size=28B cardinality=1
 |
 04:AGGREGATE
 |  output: sum:merge(int_col)
 |  group by: string_col
 |  row-size=23B cardinality=963
 |
 03:EXCHANGE [HASH(string_col)]
 |
 01:AGGREGATE [STREAMING]
 |  output: sum(int_col)
 |  group by: string_col
 |  row-size=23B cardinality=2.83K
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    row-size=19B cardinality=11.00K
 ====
 # test group_concat distinct with other aggregate functions, with custom separator
 # and a group by
 select month, year, count(*), count(distinct date_string_col),
 group_concat(distinct date_string_col, '-') from functional.alltypesagg
 group by month, year
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:AGGREGATE [FINALIZE]
 |  output: count(date_string_col), group_concat(date_string_col, '-'), count:merge(*)
 |  group by: `month`, `year`
 |  row-size=36B cardinality=1
 |
 01:AGGREGATE
 |  output: count(*)
 |  group by: `month`, `year`, date_string_col
 |  row-size=36B cardinality=10
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    row-size=28B cardinality=11.00K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 07:EXCHANGE [UNPARTITIONED]
 |
 06:AGGREGATE [FINALIZE]
 |  output: count:merge(date_string_col), group_concat:merge(date_string_col, '-'), count:merge(*)
 |  group by: `month`, `year`
 |  row-size=36B cardinality=1
 |
 05:EXCHANGE [HASH(`month`,`year`)]
 |
 02:AGGREGATE [STREAMING]
 |  output: count(date_string_col), group_concat(date_string_col, '-'), count:merge(*)
 |  group by: `month`, `year`
 |  row-size=36B cardinality=3
 |
 04:AGGREGATE
 |  output: count:merge(*)
 |  group by: `month`, `year`, date_string_col
 |  row-size=36B cardinality=10
 |
 03:EXCHANGE [HASH(`month`,`year`,date_string_col)]
 |
 01:AGGREGATE [STREAMING]
 |  output: count(*)
 |  group by: `month`, `year`, date_string_col
 |  row-size=36B cardinality=30
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    row-size=28B cardinality=11.00K
 ====
 # test multiple group_concat distinct, each with a different separator
 select group_concat(distinct string_col), group_concat(distinct string_col, '-'),
 group_concat(distinct string_col, '---')  from functional.alltypesagg
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:AGGREGATE [FINALIZE]
 |  output: group_concat(string_col), group_concat(string_col, '-'), group_concat(string_col, '---')
 |  row-size=36B cardinality=1
 |
 01:AGGREGATE
 |  group by: string_col
 |  row-size=15B cardinality=963
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    row-size=15B cardinality=11.00K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 06:AGGREGATE [FINALIZE]
 |  output: group_concat:merge(string_col), group_concat:merge(string_col, '-'), group_concat:merge(string_col, '---')
 |  row-size=36B cardinality=1
 |
 05:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE
 |  output: group_concat(string_col), group_concat(string_col, '-'), group_concat(string_col, '---')
 |  row-size=36B cardinality=1
 |
 04:AGGREGATE
 |  group by: string_col
 |  row-size=15B cardinality=963
 |
 03:EXCHANGE [HASH(string_col)]
 |
 01:AGGREGATE [STREAMING]
 |  group by: string_col
 |  row-size=15B cardinality=2.83K
 |
 00:SCAN HDFS [functional.alltypesagg]
    HDFS partitions=11/11 files=11 size=814.73KB
    row-size=15B cardinality=11.00K
 ====
 # IMPALA-852: Aggregation only in the HAVING clause.
 select 1 from functional.alltypestiny having count(*) > 0
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  having: count(*) > 0
 |  row-size=8B cardinality=1
 |
 00:SCAN HDFS [functional.alltypestiny]
    HDFS partitions=4/4 files=4 size=460B
    row-size=0B cardinality=8
 ====
 # Grouping aggregation where input is partitioned on grouping expr.
 # Planner should not redundantly repartition the data that was already partitioned on
 # the required key by the join.
 select straight_join c_custkey, count(*)
 from tpch_parquet.customer inner join [shuffle] tpch_parquet.orders on c_custkey = o_custkey
 where c_nationkey = 16
 group by 1
 having count(*) < 150000
 limit 1000000
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 06:EXCHANGE [UNPARTITIONED]
 |  limit: 1000000
 |
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: c_custkey
 |  having: count(*) < 150000
 |  limit: 1000000
 |  row-size=16B cardinality=9.15K
 |
 02:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash predicates: c_custkey = o_custkey
 |  runtime filters: RF000 <- o_custkey
 |  row-size=18B cardinality=91.47K
 |
 |--05:EXCHANGE [HASH(o_custkey)]
 |  |
 |  01:SCAN HDFS [tpch_parquet.orders]
 |     HDFS partitions=1/1 files=2 size=54.21MB
 |     row-size=8B cardinality=1.50M
 |
 04:EXCHANGE [HASH(c_custkey)]
 |
 00:SCAN HDFS [tpch_parquet.customer]
    HDFS partitions=1/1 files=1 size=12.34MB
    predicates: c_nationkey = 16
    runtime filters: RF000 -> c_custkey
    row-size=10B cardinality=6.00K
 ====
 # Distinct aggregation where input is partitioned on distinct expr.
 # Planner should not redundantly repartition the data that was already partitioned on
 # the required key by the join.
 select col from (
   select straight_join count(distinct c_custkey) col
   from tpch_parquet.orders inner join [shuffle] tpch_parquet.customer on c_custkey = o_custkey) v
 where col > 50
 limit 50
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 08:AGGREGATE [FINALIZE]
 |  output: count:merge(c_custkey)
 |  having: count(c_custkey) > 50
 |  limit: 50
 |  row-size=8B cardinality=1
 |
 07:EXCHANGE [UNPARTITIONED]
 |
 04:AGGREGATE
 |  output: count(c_custkey)
 |  row-size=8B cardinality=1
 |
 03:AGGREGATE
 |  group by: c_custkey
 |  row-size=8B cardinality=150.00K
 |
 02:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash predicates: o_custkey = c_custkey
 |  runtime filters: RF000 <- c_custkey
 |  row-size=16B cardinality=1.50M
 |
 |--06:EXCHANGE [HASH(c_custkey)]
 |  |
 |  01:SCAN HDFS [tpch_parquet.customer]
 |     HDFS partitions=1/1 files=1 size=12.34MB
 |     row-size=8B cardinality=150.00K
 |
 05:EXCHANGE [HASH(o_custkey)]
 |
 00:SCAN HDFS [tpch_parquet.orders]
    HDFS partitions=1/1 files=2 size=54.21MB
    runtime filters: RF000 -> o_custkey
    row-size=8B cardinality=1.50M
 ====
 # Distinct grouping aggregation where input is partitioned on distinct and grouping exprs.
 # Planner should not redundantly repartition the data that was already partitioned on
 # the required key by the join.
 select straight_join c_custkey, count(distinct c_custkey)
 from tpch_parquet.orders inner join [shuffle] tpch_parquet.customer on c_custkey = o_custkey
 group by 1
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 09:EXCHANGE [UNPARTITIONED]
 |
 08:AGGREGATE [FINALIZE]
 |  output: count:merge(c_custkey)
 |  group by: c_custkey
 |  row-size=16B cardinality=150.00K
 |
 07:EXCHANGE [HASH(c_custkey)]
 |
 04:AGGREGATE [STREAMING]
 |  output: count(c_custkey)
 |  group by: c_custkey
 |  row-size=16B cardinality=150.00K
 |
 03:AGGREGATE
 |  group by: c_custkey, c_custkey
 |  row-size=16B cardinality=150.00K
 |
 02:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash predicates: o_custkey = c_custkey
 |  runtime filters: RF000 <- c_custkey
 |  row-size=16B cardinality=1.50M
 |
 |--06:EXCHANGE [HASH(c_custkey)]
 |  |
 |  01:SCAN HDFS [tpch_parquet.customer]
 |     HDFS partitions=1/1 files=1 size=12.34MB
 |     row-size=8B cardinality=150.00K
 |
 05:EXCHANGE [HASH(o_custkey)]
 |
 00:SCAN HDFS [tpch_parquet.orders]
    HDFS partitions=1/1 files=2 size=54.21MB
    runtime filters: RF000 -> o_custkey
    row-size=8B cardinality=1.50M
 ====
 # Complex aggregation when two joins and an agg end up in same fragment.
 select l_orderkey, l_returnflag, count(*) from (
   select straight_join *
   from tpch_parquet.lineitem
   inner join [shuffle] tpch_parquet.orders
     on l_orderkey = o_orderkey and l_returnflag = o_clerk
   inner join [broadcast] tpch_parquet.customer
     on o_custkey = c_custkey and c_phone = o_comment
 ) v
 group by 1, 2
 having count(*) > 10
 limit 10
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 09:EXCHANGE [UNPARTITIONED]
 |  limit: 10
 |
 05:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: tpch_parquet.lineitem.l_orderkey, tpch_parquet.lineitem.l_returnflag
 |  having: count(*) > 10
 |  limit: 10
 |  row-size=29B cardinality=10
 |
 04:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: o_custkey = c_custkey, o_comment = c_phone
 |  runtime filters: RF000 <- c_custkey, RF001 <- c_phone
 |  row-size=160B cardinality=607.19K
 |
 |--08:EXCHANGE [BROADCAST]
 |  |
 |  02:SCAN HDFS [tpch_parquet.customer]
 |     HDFS partitions=1/1 files=1 size=12.34MB
 |     row-size=35B cardinality=150.00K
 |
 03:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash predicates: l_orderkey = o_orderkey, l_returnflag = o_clerk
 |  runtime filters: RF004 <- o_orderkey, RF005 <- o_clerk
 |  row-size=125B cardinality=5.76M
 |
 |--07:EXCHANGE [HASH(o_orderkey,o_clerk)]
 |  |
 |  01:SCAN HDFS [tpch_parquet.orders]
 |     HDFS partitions=1/1 files=2 size=54.21MB
 |     runtime filters: RF000 -> o_custkey, RF001 -> o_comment
 |     row-size=104B cardinality=1.50M
 |
 06:EXCHANGE [HASH(l_orderkey,l_returnflag)]
 |
 00:SCAN HDFS [tpch_parquet.lineitem]
    HDFS partitions=1/1 files=3 size=193.99MB
    runtime filters: RF004 -> l_orderkey, RF005 -> l_returnflag
    row-size=21B cardinality=5.76M(filtered from 6.00M)
 ====
 # IMPALA-4263: Grouping agg needs a merge step because the grouping exprs reference a
 # tuple that is made nullable in the join fragment.
 select /* +straight_join */ t2.id, count(*)
 from functional.alltypes t1
 left outer join /* +shuffle */ functional.alltypessmall t2
   on t1.id = t2.id
 group by t2.id
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 08:EXCHANGE [UNPARTITIONED]
 |
 07:AGGREGATE [FINALIZE]
 |  output: count:merge(*)
 |  group by: t2.id
 |  row-size=12B cardinality=99
 |
 06:EXCHANGE [HASH(t2.id)]
 |
 03:AGGREGATE [STREAMING]
 |  output: count(*)
 |  group by: t2.id
 |  row-size=12B cardinality=297
 |
 02:HASH JOIN [LEFT OUTER JOIN, PARTITIONED]
 |  hash predicates: t1.id = t2.id
 |  row-size=8B cardinality=7.30K
 |
 |--05:EXCHANGE [HASH(t2.id)]
 |  |
 |  01:SCAN HDFS [functional.alltypessmall t2]
 |     HDFS partitions=4/4 files=4 size=6.32KB
 |     row-size=4B cardinality=100
 |
 04:EXCHANGE [HASH(t1.id)]
 |
 00:SCAN HDFS [functional.alltypes t1]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=4B cardinality=7.30K
 ====
 # IMPALA-4263: Grouping agg is placed in the join fragment and has no merge step.
 select /* +straight_join */ t1.id, count(*)
 from functional.alltypes t1
 left outer join /* +shuffle */ functional.alltypessmall t2
   on t1.id = t2.id
 group by t1.id
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 06:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: t1.id
 |  row-size=12B cardinality=7.30K
 |
 02:HASH JOIN [LEFT OUTER JOIN, PARTITIONED]
 |  hash predicates: t1.id = t2.id
 |  row-size=8B cardinality=7.30K
 |
 |--05:EXCHANGE [HASH(t2.id)]
 |  |
 |  01:SCAN HDFS [functional.alltypessmall t2]
 |     HDFS partitions=4/4 files=4 size=6.32KB
 |     row-size=4B cardinality=100
 |
 04:EXCHANGE [HASH(t1.id)]
 |
 00:SCAN HDFS [functional.alltypes t1]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=4B cardinality=7.30K
 ====
 # IMPALA-4263: Grouping agg is placed in the second join fragment and has no merge step.
 # The grouping exprs reference a nullable tuple (t2), but that tuple is made nullable in
 # the first join fragment, so it's correct to place the the aggregation in the second
 # join fragment without a merge step.
 select /* +straight_join */ t2.id, count(*)
 from functional.alltypes t1
 left outer join /* +shuffle */ functional.alltypessmall t2
   on t1.int_col = t2.int_col
 left outer join /* +shuffle */ functional.alltypestiny t3
   on t2.id = t3.id
 group by t2.id
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 10:EXCHANGE [UNPARTITIONED]
 |
 05:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: t2.id
 |  row-size=12B cardinality=99
 |
 04:HASH JOIN [LEFT OUTER JOIN, PARTITIONED]
 |  hash predicates: t2.id = t3.id
 |  row-size=16B cardinality=73.00K
 |
 |--09:EXCHANGE [HASH(t3.id)]
 |  |
 |  02:SCAN HDFS [functional.alltypestiny t3]
 |     HDFS partitions=4/4 files=4 size=460B
 |     row-size=4B cardinality=8
 |
 08:EXCHANGE [HASH(t2.id)]
 |
 03:HASH JOIN [LEFT OUTER JOIN, PARTITIONED]
 |  hash predicates: t1.int_col = t2.int_col
 |  row-size=12B cardinality=73.00K
 |
 |--07:EXCHANGE [HASH(t2.int_col)]
 |  |
 |  01:SCAN HDFS [functional.alltypessmall t2]
 |     HDFS partitions=4/4 files=4 size=6.32KB
 |     row-size=8B cardinality=100
 |
 06:EXCHANGE [HASH(t1.int_col)]
 |
 00:SCAN HDFS [functional.alltypes t1]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=4B cardinality=7.30K
 ====
 # IMPALA-10096: use the original ordinal if the group by ordinal reference is a
 # constant int
 select 13, id, count(1) from functional.dimtbl group by 1, 2
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: 13, id
 |  row-size=17B cardinality=10
 |
 00:SCAN HDFS [functional.dimtbl]
    HDFS partitions=1/1 files=1 size=171B
    row-size=8B cardinality=10
 ====
 # IMPALA-10096: use the original ordinal if the group by ordinal reference is a
 # constant int
 select -1, id, count(1) from functional.dimtbl group by 1, 2
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: -1, id
 |  row-size=17B cardinality=10
 |
 00:SCAN HDFS [functional.dimtbl]
    HDFS partitions=1/1 files=1 size=171B
    row-size=8B cardinality=10
 ====
 # IMPALA-10096: use the original ordinal if the group by ordinal reference is a
 # constant int
 select 2, id, count(1) from functional.dimtbl group by 1, 2
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: 2, id
 |  row-size=17B cardinality=10
 |
 00:SCAN HDFS [functional.dimtbl]
    HDFS partitions=1/1 files=1 size=171B
    row-size=8B cardinality=10
 ====
 # IMPALA-10096: use the original ordinal if the group by ordinal reference is a
 # constant int
 select 2 + 1, id, count(1) from functional.dimtbl group by 1, 2
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: 3, id
 |  row-size=17B cardinality=10
 |
 00:SCAN HDFS [functional.dimtbl]
    HDFS partitions=1/1 files=1 size=171B
    row-size=8B cardinality=10
 ====
 # IMPALA-10865: Group by expr with column alias reference errors in
 # re-analyze phase.
 SELECT ss_item_sk ss_item_sk_group, ss_item_sk+300 ss_item_sk,
     count(ss_ticket_number)
 FROM tpcds.store_sales a
 WHERE ss_sold_date_sk > cast('245263' AS INT)
 GROUP BY ss_item_sk_group,
          ss_item_sk
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(ss_ticket_number)
 |  group by: ss_item_sk, ss_item_sk + 300
 |  row-size=24B cardinality=2.75M
 |
 00:SCAN HDFS [tpcds.store_sales a]
    partition predicates: ss_sold_date_sk > 245263
    HDFS partitions=1823/1824 files=1823 size=336.51MB
    row-size=16B cardinality=2.75M
 ---- PARALLELPLANS
 PLAN-ROOT SINK
 |
 04:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE [FINALIZE]
 |  output: count:merge(ss_ticket_number)
 |  group by: ss_item_sk, ss_item_sk + 300
 |  row-size=24B cardinality=2.75M
 |
 02:EXCHANGE [HASH(ss_item_sk,ss_item_sk + 300)]
 |
 01:AGGREGATE [STREAMING]
 |  output: count(ss_ticket_number)
 |  group by: ss_item_sk, ss_item_sk + 300
 |  row-size=24B cardinality=2.75M
 |
 00:SCAN HDFS [tpcds.store_sales a]
    partition predicates: ss_sold_date_sk > 245263
    HDFS partitions=1823/1824 files=1823 size=336.51MB
    row-size=16B cardinality=2.75M
 ====
 # Baseline for next similar test cases below that do 2-phase aggregation.
 # The query output is:
 # +----+----+----+
 # | bc | c1 | c2 |
 # +----+----+----+
 # | 0  | 12 | 1  |
 # | 10 | 12 | 1  |
 # | 20 | 12 | 1  |
 # | 30 | 12 | 1  |
 # | 40 | 12 | 1  |
 # | 50 | 8  | 1  |
 # | 60 | 8  | 1  |
 # | 70 | 8  | 1  |
 # | 80 | 8  | 1  |
 # | 90 | 8  | 1  |
 # +----+----+----+
 select
   bigint_col bc, count(smallint_col) c1, count(distinct int_col) c2
 from functional.alltypessmall
 group by bigint_col
 order by bc;
 ---- PLAN
 PLAN-ROOT SINK
 |
 03:SORT
 |  order by: bigint_col ASC
 |  row-size=24B cardinality=10
 |
 02:AGGREGATE [FINALIZE]
 |  output: count(int_col), count:merge(smallint_col)
 |  group by: bigint_col
 |  row-size=24B cardinality=10
 |
 01:AGGREGATE
 |  output: count(smallint_col)
 |  group by: bigint_col, int_col
 |  row-size=20B cardinality=100
 |
 00:SCAN HDFS [functional.alltypessmall]
    HDFS partitions=4/4 files=4 size=6.32KB
    row-size=14B cardinality=100
 ---- PARALLELPLANS
 PLAN-ROOT SINK
 |
 08:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: bigint_col ASC
 |
 03:SORT
 |  order by: bigint_col ASC
 |  row-size=24B cardinality=10
 |
 07:AGGREGATE [FINALIZE]
 |  output: count:merge(int_col), count:merge(smallint_col)
 |  group by: bigint_col
 |  row-size=24B cardinality=10
 |
 06:EXCHANGE [HASH(bigint_col)]
 |
 02:AGGREGATE [STREAMING]
 |  output: count(int_col), count:merge(smallint_col)
 |  group by: bigint_col
 |  row-size=24B cardinality=40
 |
 05:AGGREGATE
 |  output: count:merge(smallint_col)
 |  group by: bigint_col, int_col
 |  row-size=20B cardinality=100
 |
 04:EXCHANGE [HASH(bigint_col,int_col)]
 |
 01:AGGREGATE [STREAMING]
 |  output: count(smallint_col)
 |  group by: bigint_col, int_col
 |  row-size=20B cardinality=100
 |
 00:SCAN HDFS [functional.alltypessmall]
    HDFS partitions=4/4 files=4 size=6.32KB
    row-size=14B cardinality=100
 ====
 # Add limit 7.
 select
   bigint_col bc, count(smallint_col) c1, count(distinct int_col) c2
 from functional.alltypessmall
 group by bigint_col
 order by bc
 limit 7;
 ---- PLAN
 PLAN-ROOT SINK
 |
 03:TOP-N [LIMIT=7]
 |  order by: bigint_col ASC
 |  row-size=24B cardinality=7
 |
 02:AGGREGATE [FINALIZE]
 |  output: count(int_col), count:merge(smallint_col)
 |  group by: bigint_col
 |  row-size=24B cardinality=10
 |
 01:AGGREGATE
 |  output: count(smallint_col)
 |  group by: bigint_col, int_col
 |  row-size=20B cardinality=100
 |
 00:SCAN HDFS [functional.alltypessmall]
    HDFS partitions=4/4 files=4 size=6.32KB
    row-size=14B cardinality=100
 ---- PARALLELPLANS
 PLAN-ROOT SINK
 |
 08:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: bigint_col ASC
 |  limit: 7
 |
 03:TOP-N [LIMIT=7]
 |  order by: bigint_col ASC
 |  row-size=24B cardinality=7
 |
 07:AGGREGATE [FINALIZE]
 |  output: count:merge(int_col), count:merge(smallint_col)
 |  group by: bigint_col
 |  row-size=24B cardinality=10
 |
 06:EXCHANGE [HASH(bigint_col)]
 |
 02:AGGREGATE [STREAMING]
 |  output: count(int_col), count:merge(smallint_col)
 |  group by: bigint_col
 |  row-size=24B cardinality=40
 |
 05:AGGREGATE
 |  output: count:merge(smallint_col)
 |  group by: bigint_col, int_col
 |  row-size=20B cardinality=100
 |
 04:EXCHANGE [HASH(bigint_col,int_col)]
 |
 01:AGGREGATE [STREAMING]
 |  output: count(smallint_col)
 |  group by: bigint_col, int_col
 |  row-size=20B cardinality=100
 |
 00:SCAN HDFS [functional.alltypessmall]
    HDFS partitions=4/4 files=4 size=6.32KB
    row-size=14B cardinality=100
 ====
 # Add equality HAVING predicate over grouping column.
 # That HAVING predicate should be pushed down to scan.
 select
   bigint_col bc, count(smallint_col) c1, count(distinct int_col) c2
 from functional.alltypessmall
 group by bigint_col
 having bigint_col = 0
 order by bc;
 ---- PLAN
 PLAN-ROOT SINK
 |
 03:SORT
 |  order by: bigint_col ASC
 |  row-size=24B cardinality=10
 |
 02:AGGREGATE [FINALIZE]
 |  output: count(int_col), count:merge(smallint_col)
 |  group by: bigint_col
 |  row-size=24B cardinality=10
 |
 01:AGGREGATE
 |  output: count(smallint_col)
 |  group by: bigint_col, int_col
 |  row-size=20B cardinality=10
 |
 00:SCAN HDFS [functional.alltypessmall]
    HDFS partitions=4/4 files=4 size=6.32KB
    predicates: functional.alltypessmall.bigint_col = 0
    row-size=14B cardinality=10
 ---- PARALLELPLANS
 PLAN-ROOT SINK
 |
 08:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: bigint_col ASC
 |
 03:SORT
 |  order by: bigint_col ASC
 |  row-size=24B cardinality=10
 |
 07:AGGREGATE [FINALIZE]
 |  output: count:merge(int_col), count:merge(smallint_col)
 |  group by: bigint_col
 |  row-size=24B cardinality=10
 |
 06:EXCHANGE [HASH(bigint_col)]
 |
 02:AGGREGATE [STREAMING]
 |  output: count(int_col), count:merge(smallint_col)
 |  group by: bigint_col
 |  row-size=24B cardinality=10
 |
 05:AGGREGATE
 |  output: count:merge(smallint_col)
 |  group by: bigint_col, int_col
 |  row-size=20B cardinality=10
 |
 04:EXCHANGE [HASH(bigint_col,int_col)]
 |
 01:AGGREGATE [STREAMING]
 |  output: count(smallint_col)
 |  group by: bigint_col, int_col
 |  row-size=20B cardinality=10
 |
 00:SCAN HDFS [functional.alltypessmall]
    HDFS partitions=4/4 files=4 size=6.32KB
    predicates: functional.alltypessmall.bigint_col = 0
    row-size=14B cardinality=10
 ====
 # Add equality HAVING predicate over output expression.
 # Actual cardinality of 02:AGGREGATE is 5.
 # TODO: Apply the 10% default selectivity instead.
 select
   bigint_col bc, count(smallint_col) c1, count(distinct int_col) c2
 from functional.alltypessmall
 group by bigint_col
 having count(smallint_col) = 8
 order by bc;
 ---- PLAN
 PLAN-ROOT SINK
 |
 03:SORT
 |  order by: bigint_col ASC
 |  row-size=24B cardinality=1
 |
 02:AGGREGATE [FINALIZE]
 |  output: count(int_col), count:merge(smallint_col)
 |  group by: bigint_col
 |  having: count(smallint_col) = 8
 |  row-size=24B cardinality=1
 |
 01:AGGREGATE
 |  output: count(smallint_col)
 |  group by: bigint_col, int_col
 |  row-size=20B cardinality=100
 |
 00:SCAN HDFS [functional.alltypessmall]
    HDFS partitions=4/4 files=4 size=6.32KB
    row-size=14B cardinality=100
 ---- PARALLELPLANS
 PLAN-ROOT SINK
 |
 08:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: bigint_col ASC
 |
 03:SORT
 |  order by: bigint_col ASC
 |  row-size=24B cardinality=1
 |
 07:AGGREGATE [FINALIZE]
 |  output: count:merge(int_col), count:merge(smallint_col)
 |  group by: bigint_col
 |  having: count(smallint_col) = 8
 |  row-size=24B cardinality=1
 |
 06:EXCHANGE [HASH(bigint_col)]
 |
 02:AGGREGATE [STREAMING]
 |  output: count(int_col), count:merge(smallint_col)
 |  group by: bigint_col
 |  row-size=24B cardinality=40
 |
 05:AGGREGATE
 |  output: count:merge(smallint_col)
 |  group by: bigint_col, int_col
 |  row-size=20B cardinality=100
 |
 04:EXCHANGE [HASH(bigint_col,int_col)]
 |
 01:AGGREGATE [STREAMING]
 |  output: count(smallint_col)
 |  group by: bigint_col, int_col
 |  row-size=20B cardinality=100
 |
 00:SCAN HDFS [functional.alltypessmall]
    HDFS partitions=4/4 files=4 size=6.32KB
    row-size=14B cardinality=100
 ====
 # Add in-list HAVING predicate over output expression.
 # Actual cardinality of 02:AGGREGATE is 5.
 # TODO: Apply the 10% default selectivity instead.
 select
   bigint_col bc, count(smallint_col) c1, count(distinct int_col) c2
 from functional.alltypessmall
 group by bigint_col
 having count(smallint_col) in (7, 8, 9)
 order by bc;
 ---- PLAN
 PLAN-ROOT SINK
 |
 03:SORT
 |  order by: bigint_col ASC
 |  row-size=24B cardinality=3
 |
 02:AGGREGATE [FINALIZE]
 |  output: count(int_col), count:merge(smallint_col)
 |  group by: bigint_col
 |  having: count(smallint_col) IN (7, 8, 9)
 |  row-size=24B cardinality=3
 |
 01:AGGREGATE
 |  output: count(smallint_col)
 |  group by: bigint_col, int_col
 |  row-size=20B cardinality=100
 |
 00:SCAN HDFS [functional.alltypessmall]
    HDFS partitions=4/4 files=4 size=6.32KB
    row-size=14B cardinality=100
 ---- PARALLELPLANS
 PLAN-ROOT SINK
 |
 08:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: bigint_col ASC
 |
 03:SORT
 |  order by: bigint_col ASC
 |  row-size=24B cardinality=3
 |
 07:AGGREGATE [FINALIZE]
 |  output: count:merge(int_col), count:merge(smallint_col)
 |  group by: bigint_col
 |  having: count(smallint_col) IN (7, 8, 9)
 |  row-size=24B cardinality=3
 |
 06:EXCHANGE [HASH(bigint_col)]
 |
 02:AGGREGATE [STREAMING]
 |  output: count(int_col), count:merge(smallint_col)
 |  group by: bigint_col
 |  row-size=24B cardinality=40
 |
 05:AGGREGATE
 |  output: count:merge(smallint_col)
 |  group by: bigint_col, int_col
 |  row-size=20B cardinality=100
 |
 04:EXCHANGE [HASH(bigint_col,int_col)]
 |
 01:AGGREGATE [STREAMING]
 |  output: count(smallint_col)
 |  group by: bigint_col, int_col
 |  row-size=20B cardinality=100
 |
 00:SCAN HDFS [functional.alltypessmall]
    HDFS partitions=4/4 files=4 size=6.32KB
    row-size=14B cardinality=100
 ====
 # Aggregation across nested views. The NDV of aggregation columns are following:
 # ss_item_sk = 17975
 # ss_customer_sk = 90632
 # d_month_seq = 2421
 # numRows(store_sales) = 2880404 & numRows(date_dim) = 73049
 with v1 as (
      select ss_item_sk, ss_customer_sk, d_month_seq, count(*)
      from tpcds.store_sales ss
      inner join tpcds.date_dim dd on ss.ss_sold_date_sk = dd.d_date_sk
      group by ss_item_sk, ss_customer_sk, d_month_seq),
    v2 as (
      select ss_item_sk, ss_customer_sk, count(*)
      from v1 group by ss_item_sk, ss_customer_sk)
 select ss_item_sk, count(*) from v2 group by ss_item_sk;
 ---- PLAN
 PLAN-ROOT SINK
 |
 05:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: ss_item_sk
 |  row-size=16B cardinality=17.98K
 |
 04:AGGREGATE [FINALIZE]
 |  group by: ss_item_sk, ss_customer_sk
 |  row-size=12B cardinality=2.88M
 |
 03:AGGREGATE [FINALIZE]
 |  group by: ss_item_sk, ss_customer_sk, d_month_seq
 |  row-size=16B cardinality=2.88M
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: ss.ss_sold_date_sk = dd.d_date_sk
 |  runtime filters: RF000 <- dd.d_date_sk
 |  row-size=24B cardinality=2.88M
 |
 |--01:SCAN HDFS [tpcds.date_dim dd]
 |     HDFS partitions=1/1 files=1 size=9.84MB
 |     row-size=8B cardinality=73.05K
 |
 00:SCAN HDFS [tpcds.store_sales ss]
    HDFS partitions=1824/1824 files=1824 size=346.60MB
    runtime filters: RF000 -> ss.ss_sold_date_sk
    row-size=16B cardinality=2.88M
 ---- PARALLELPLANS
 PLAN-ROOT SINK
 |
 13:EXCHANGE [UNPARTITIONED]
 |
 12:AGGREGATE [FINALIZE]
 |  output: count:merge(*)
 |  group by: ss_item_sk
 |  row-size=16B cardinality=17.98K
 |
 11:EXCHANGE [HASH(ss_item_sk)]
 |
 05:AGGREGATE [STREAMING]
 |  output: count(*)
 |  group by: ss_item_sk
 |  row-size=16B cardinality=107.85K
 |
 10:AGGREGATE [FINALIZE]
 |  group by: ss_item_sk, ss_customer_sk
 |  row-size=12B cardinality=2.88M
 |
 09:EXCHANGE [HASH(ss_item_sk,ss_customer_sk)]
 |
 04:AGGREGATE [STREAMING]
 |  group by: ss_item_sk, ss_customer_sk
 |  row-size=12B cardinality=2.88M
 |
 08:AGGREGATE [FINALIZE]
 |  group by: ss_item_sk, ss_customer_sk, d_month_seq
 |  row-size=16B cardinality=2.88M
 |
 07:EXCHANGE [HASH(ss_item_sk,ss_customer_sk,d_month_seq)]
 |
 03:AGGREGATE [STREAMING]
 |  group by: ss_item_sk, ss_customer_sk, d_month_seq
 |  row-size=16B cardinality=2.88M
 |
 02:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: ss.ss_sold_date_sk = dd.d_date_sk
 |  row-size=24B cardinality=2.88M
 |
 |--JOIN BUILD
 |  |  join-table-id=00 plan-id=01 cohort-id=01
 |  |  build expressions: dd.d_date_sk
 |  |  runtime filters: RF000 <- dd.d_date_sk
 |  |
 |  06:EXCHANGE [BROADCAST]
 |  |
 |  01:SCAN HDFS [tpcds.date_dim dd]
 |     HDFS partitions=1/1 files=1 size=9.84MB
 |     row-size=8B cardinality=73.05K
 |
 00:SCAN HDFS [tpcds.store_sales ss]
    HDFS partitions=1824/1824 files=1824 size=346.60MB
    runtime filters: RF000 -> ss.ss_sold_date_sk
    row-size=16B cardinality=2.88M
 ====
 # Aggregation across nested views where the grouping columns do not change.
 # The NDV of aggregation columns are following:
 # c_nationkey = 25
 # c_custkey = 150000
 with v1 as (
      select c_nationkey, c_custkey, count(*)
      from tpch.customer
      group by c_nationkey, c_custkey),
    v2 as (
      select c_nationkey, c_custkey, count(*)
      from v1, tpch.orders
      where c_custkey = o_custkey
      group by c_nationkey, c_custkey)
 select c_nationkey, count(*) from v2 group by c_nationkey;
 ---- PLAN
 PLAN-ROOT SINK
 |
 05:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: c_nationkey
 |  row-size=10B cardinality=25
 |
 04:AGGREGATE [FINALIZE]
 |  group by: c_nationkey, c_custkey
 |  row-size=10B cardinality=150.00K
 |
 03:HASH JOIN [INNER JOIN]
 |  hash predicates: o_custkey = c_custkey
 |  runtime filters: RF000 <- c_custkey
 |  row-size=18B cardinality=1.50M
 |
 |--01:AGGREGATE [FINALIZE]
 |  |  group by: c_nationkey, c_custkey
 |  |  row-size=10B cardinality=150.00K
 |  |
 |  00:SCAN HDFS [tpch.customer]
 |     HDFS partitions=1/1 files=1 size=23.08MB
 |     row-size=10B cardinality=150.00K
 |
 02:SCAN HDFS [tpch.orders]
    HDFS partitions=1/1 files=1 size=162.56MB
    runtime filters: RF000 -> o_custkey
    row-size=8B cardinality=1.50M
 ---- PARALLELPLANS
 PLAN-ROOT SINK
 |
 13:EXCHANGE [UNPARTITIONED]
 |
 12:AGGREGATE [FINALIZE]
 |  output: count:merge(*)
 |  group by: c_nationkey
 |  row-size=10B cardinality=25
 |
 11:EXCHANGE [HASH(c_nationkey)]
 |
 05:AGGREGATE [STREAMING]
 |  output: count(*)
 |  group by: c_nationkey
 |  row-size=10B cardinality=50
 |
 10:AGGREGATE [FINALIZE]
 |  group by: c_nationkey, c_custkey
 |  row-size=10B cardinality=150.00K
 |
 09:EXCHANGE [HASH(c_nationkey,c_custkey)]
 |
 04:AGGREGATE [STREAMING]
 |  group by: c_nationkey, c_custkey
 |  row-size=10B cardinality=297.98K
 |
 03:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: o_custkey = c_custkey
 |  row-size=18B cardinality=1.50M
 |
 |--JOIN BUILD
 |  |  join-table-id=00 plan-id=01 cohort-id=01
 |  |  build expressions: c_custkey
 |  |  runtime filters: RF000 <- c_custkey
 |  |
 |  08:EXCHANGE [BROADCAST]
 |  |
 |  07:AGGREGATE [FINALIZE]
 |  |  group by: c_nationkey, c_custkey
 |  |  row-size=10B cardinality=150.00K
 |  |
 |  06:EXCHANGE [HASH(c_nationkey,c_custkey)]
 |  |
 |  01:AGGREGATE [STREAMING]
 |  |  group by: c_nationkey, c_custkey
 |  |  row-size=10B cardinality=150.00K
 |  |
 |  00:SCAN HDFS [tpch.customer]
 |     HDFS partitions=1/1 files=1 size=23.08MB
 |     row-size=10B cardinality=150.00K
 |
 02:SCAN HDFS [tpch.orders]
    HDFS partitions=1/1 files=1 size=162.56MB
    runtime filters: RF000 -> o_custkey
    row-size=8B cardinality=1.50M
 ====
 # Aggregation across nested views where the grouping columns do not change,
 # and there is a UnionNode in plan tree.
 # The NDV of aggregation columns are following:
 # c_nationkey = 25
 # c_custkey = 150000
 with v0 as (
       select * from tpch.customer
       union all
       select * from tpch.customer
     ),
     v1 as (
      select c_nationkey, c_custkey, count(*)
      from v0
      group by c_nationkey, c_custkey),
    v2 as (
      select c_nationkey, c_custkey, count(*)
      from v1, tpch.orders
      where c_custkey = o_custkey
      group by c_nationkey, c_custkey)
 select c_nationkey, count(*) from v2 group by c_nationkey;
 ---- PLAN
 PLAN-ROOT SINK
 |
 07:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: c_nationkey
 |  row-size=10B cardinality=25
 |
 06:AGGREGATE [FINALIZE]
 |  group by: c_nationkey, c_custkey
 |  row-size=10B cardinality=300.00K
 |
 05:HASH JOIN [INNER JOIN]
 |  hash predicates: o_custkey = c_custkey
 |  runtime filters: RF000 <- c_custkey
 |  row-size=18B cardinality=3.00M
 |
 |--03:AGGREGATE [FINALIZE]
 |  |  group by: c_nationkey, c_custkey
 |  |  row-size=10B cardinality=300.00K
 |  |
 |  00:UNION
 |  |  pass-through-operands: all
 |  |  row-size=10B cardinality=300.00K
 |  |
 |  |--02:SCAN HDFS [tpch.customer]
 |  |     HDFS partitions=1/1 files=1 size=23.08MB
 |  |     row-size=10B cardinality=150.00K
 |  |
 |  01:SCAN HDFS [tpch.customer]
 |     HDFS partitions=1/1 files=1 size=23.08MB
 |     row-size=10B cardinality=150.00K
 |
 04:SCAN HDFS [tpch.orders]
    HDFS partitions=1/1 files=1 size=162.56MB
    runtime filters: RF000 -> o_custkey
    row-size=8B cardinality=1.50M
 ---- PARALLELPLANS
 PLAN-ROOT SINK
 |
 15:EXCHANGE [UNPARTITIONED]
 |
 14:AGGREGATE [FINALIZE]
 |  output: count:merge(*)
 |  group by: c_nationkey
 |  row-size=10B cardinality=25
 |
 13:EXCHANGE [HASH(c_nationkey)]
 |
 07:AGGREGATE [STREAMING]
 |  output: count(*)
 |  group by: c_nationkey
 |  row-size=10B cardinality=50
 |
 12:AGGREGATE [FINALIZE]
 |  group by: c_nationkey, c_custkey
 |  row-size=10B cardinality=300.00K
 |
 11:EXCHANGE [HASH(c_nationkey,c_custkey)]
 |
 06:AGGREGATE [STREAMING]
 |  group by: c_nationkey, c_custkey
 |  row-size=10B cardinality=595.96K
 |
 05:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: o_custkey = c_custkey
 |  row-size=18B cardinality=3.00M
 |
 |--JOIN BUILD
 |  |  join-table-id=00 plan-id=01 cohort-id=01
 |  |  build expressions: c_custkey
 |  |  runtime filters: RF000 <- c_custkey
 |  |
 |  10:EXCHANGE [BROADCAST]
 |  |
 |  09:AGGREGATE [FINALIZE]
 |  |  group by: c_nationkey, c_custkey
 |  |  row-size=10B cardinality=300.00K
 |  |
 |  08:EXCHANGE [HASH(c_nationkey,c_custkey)]
 |  |
 |  03:AGGREGATE [STREAMING]
 |  |  group by: c_nationkey, c_custkey
 |  |  row-size=10B cardinality=300.00K
 |  |
 |  00:UNION
 |  |  pass-through-operands: all
 |  |  row-size=10B cardinality=300.00K
 |  |
 |  |--02:SCAN HDFS [tpch.customer]
 |  |     HDFS partitions=1/1 files=1 size=23.08MB
 |  |     row-size=10B cardinality=150.00K
 |  |
 |  01:SCAN HDFS [tpch.customer]
 |     HDFS partitions=1/1 files=1 size=23.08MB
 |     row-size=10B cardinality=150.00K
 |
 04:SCAN HDFS [tpch.orders]
    HDFS partitions=1/1 files=1 size=162.56MB
    runtime filters: RF000 -> o_custkey
    row-size=8B cardinality=1.50M
 ====
 # Aggregation across nested views where the grouping columns do not change,
 # and there is a UnionNode with limit in plan tree.
 # The NDV of aggregation columns are following:
 # c_nationkey = 25
 # c_custkey = 150000
 with v0 as (
       select * from (
         select * from tpch.customer
         union all
         select * from tpch.customer
       ) u limit 100
     ),
     v1 as (
      select c_nationkey, c_custkey, count(*)
      from v0
      group by c_nationkey, c_custkey),
    v2 as (
      select c_nationkey, c_custkey, count(*)
      from v1, tpch.orders
      where c_custkey = o_custkey
      group by c_nationkey, c_custkey)
 select c_nationkey, count(*) from v2 group by c_nationkey;
 ---- PLAN
 PLAN-ROOT SINK
 |
 07:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: c_nationkey
 |  row-size=10B cardinality=25
 |
 06:AGGREGATE [FINALIZE]
 |  group by: c_nationkey, c_custkey
 |  row-size=10B cardinality=100
 |
 05:HASH JOIN [INNER JOIN]
 |  hash predicates: o_custkey = c_custkey
 |  runtime filters: RF000 <- c_custkey
 |  row-size=18B cardinality=1.52K
 |
 |--03:AGGREGATE [FINALIZE]
 |  |  group by: c_nationkey, c_custkey
 |  |  row-size=10B cardinality=100
 |  |
 |  00:UNION
 |  |  pass-through-operands: all
 |  |  limit: 100
 |  |  row-size=10B cardinality=100
 |  |
 |  |--02:SCAN HDFS [tpch.customer]
 |  |     HDFS partitions=1/1 files=1 size=23.08MB
 |  |     row-size=10B cardinality=150.00K
 |  |
 |  01:SCAN HDFS [tpch.customer]
 |     HDFS partitions=1/1 files=1 size=23.08MB
 |     row-size=10B cardinality=150.00K
 |
 04:SCAN HDFS [tpch.orders]
    HDFS partitions=1/1 files=1 size=162.56MB
    runtime filters: RF000 -> o_custkey
    row-size=8B cardinality=1.50M
 ---- PARALLELPLANS
 PLAN-ROOT SINK
 |
 14:EXCHANGE [UNPARTITIONED]
 |
 13:AGGREGATE [FINALIZE]
 |  output: count:merge(*)
 |  group by: c_nationkey
 |  row-size=10B cardinality=25
 |
 12:EXCHANGE [HASH(c_nationkey)]
 |
 07:AGGREGATE [STREAMING]
 |  output: count(*)
 |  group by: c_nationkey
 |  row-size=10B cardinality=44
 |
 11:AGGREGATE [FINALIZE]
 |  group by: c_nationkey, c_custkey
 |  row-size=10B cardinality=100
 |
 10:EXCHANGE [HASH(c_nationkey,c_custkey)]
 |
 06:AGGREGATE [STREAMING]
 |  group by: c_nationkey, c_custkey
 |  row-size=10B cardinality=200
 |
 05:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: o_custkey = c_custkey
 |  row-size=18B cardinality=1.52K
 |
 |--JOIN BUILD
 |  |  join-table-id=00 plan-id=01 cohort-id=01
 |  |  build expressions: c_custkey
 |  |  runtime filters: RF000 <- c_custkey
 |  |
 |  09:EXCHANGE [BROADCAST]
 |  |
 |  03:AGGREGATE [FINALIZE]
 |  |  group by: c_nationkey, c_custkey
 |  |  row-size=10B cardinality=100
 |  |
 |  08:EXCHANGE [UNPARTITIONED]
 |  |  limit: 100
 |  |
 |  00:UNION
 |  |  pass-through-operands: all
 |  |  limit: 100
 |  |  row-size=10B cardinality=100
 |  |
 |  |--02:SCAN HDFS [tpch.customer]
 |  |     HDFS partitions=1/1 files=1 size=23.08MB
 |  |     row-size=10B cardinality=150.00K
 |  |
 |  01:SCAN HDFS [tpch.customer]
 |     HDFS partitions=1/1 files=1 size=23.08MB
 |     row-size=10B cardinality=150.00K
 |
 04:SCAN HDFS [tpch.orders]
    HDFS partitions=1/1 files=1 size=162.56MB
    runtime filters: RF000 -> o_custkey
    row-size=8B cardinality=1.50M
 ====
 # Test grouping with equality stats predicate on grouping expression.
 select ss_customer_sk, count(*)
 from tpcds_parquet.store_sales
 where ss_customer_sk = 1
 group by ss_customer_sk;
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: ss_customer_sk
 |  row-size=12B cardinality=1
 |
 00:SCAN HDFS [tpcds_parquet.store_sales]
    HDFS partitions=1824/1824 files=1824 size=200.96MB
    predicates: ss_customer_sk = 1
    row-size=4B cardinality=30
 ---- PARALLELPLANS
 PLAN-ROOT SINK
 |
 04:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE [FINALIZE]
 |  output: count:merge(*)
 |  group by: ss_customer_sk
 |  row-size=12B cardinality=1
 |
 02:EXCHANGE [HASH(ss_customer_sk)]
 |
 01:AGGREGATE [STREAMING]
 |  output: count(*)
 |  group by: ss_customer_sk
 |  row-size=12B cardinality=6
 |
 00:SCAN HDFS [tpcds_parquet.store_sales]
    HDFS partitions=1824/1824 files=1824 size=200.96MB
    predicates: ss_customer_sk = 1
    row-size=4B cardinality=30
 ====
 # Test grouping with in-list stats predicate on grouping expression.
 select ss_customer_sk, count(*)
 from tpcds_parquet.store_sales
 where ss_customer_sk in (1, 2, 3, 4, 5)
 group by ss_customer_sk;
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: ss_customer_sk
 |  row-size=12B cardinality=5
 |
 00:SCAN HDFS [tpcds_parquet.store_sales]
    HDFS partitions=1824/1824 files=1824 size=200.96MB
    predicates: ss_customer_sk IN (1, 2, 3, 4, 5)
    row-size=4B cardinality=159
 ---- PARALLELPLANS
 PLAN-ROOT SINK
 |
 04:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE [FINALIZE]
 |  output: count:merge(*)
 |  group by: ss_customer_sk
 |  row-size=12B cardinality=5
 |
 02:EXCHANGE [HASH(ss_customer_sk)]
 |
 01:AGGREGATE [STREAMING]
 |  output: count(*)
 |  group by: ss_customer_sk
 |  row-size=12B cardinality=30
 |
 00:SCAN HDFS [tpcds_parquet.store_sales]
    HDFS partitions=1824/1824 files=1824 size=200.96MB
    predicates: ss_customer_sk IN (1, 2, 3, 4, 5)
    row-size=4B cardinality=159
 ====
 # Test grouping with is-null stats predicate on grouping expression.
 select ss_customer_sk, count(*)
 from tpcds_orc_def.store_sales
 where ss_customer_sk is null
 group by ss_customer_sk;
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  group by: ss_customer_sk
 |  row-size=12B cardinality=1
 |
 00:SCAN HDFS [tpcds_orc_def.store_sales]
    HDFS partitions=1824/1824 files=1824 size=101.97MB
    predicates: ss_customer_sk IS NULL
    row-size=4B cardinality=unavailable
 ---- PARALLELPLANS
 PLAN-ROOT SINK
 |
 04:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE [FINALIZE]
 |  output: count:merge(*)
 |  group by: ss_customer_sk
 |  row-size=12B cardinality=1
 |
 02:EXCHANGE [HASH(ss_customer_sk)]
 |
 01:AGGREGATE [STREAMING]
 |  output: count(*)
 |  group by: ss_customer_sk
 |  row-size=12B cardinality=1
 |
 00:SCAN HDFS [tpcds_orc_def.store_sales]
    HDFS partitions=1824/1824 files=1824 size=101.97MB
    predicates: ss_customer_sk IS NULL
    row-size=4B cardinality=unavailable
 ====