testdata/workloads/functional-planner/queries/PlannerTest/multiple-distinct-materialization.test - impala - Git at Google

 # A subset of aggregation classes are materialized. No group by.
 select a, c, e1, e2, e3 from
   (select count(distinct tinyint_col) a, avg(distinct smallint_col) b,
           count(distinct int_col) c, avg(distinct bigint_col) d,
           min(float_col) e1, max(float_col) e2, sum(double_col) e3
    from functional.alltypes) v
 ---- PLAN
 PLAN-ROOT SINK
 |
 03:AGGREGATE [FINALIZE]
 |  output: aggif(valid_tid(2,5,7,10,11) = 2, count(tinyint_col)), aggif(valid_tid(2,5,7,10,11) = 7, count(int_col)), aggif(valid_tid(2,5,7,10,11) = 11, min(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, max(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, sum(double_col))
 |  row-size=32B cardinality=1
 |
 02:AGGREGATE [FINALIZE]
 |  Class 0
 |    output: count(tinyint_col)
 |  Class 1
 |    output: count(int_col)
 |  Class 2
 |    output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
 |  row-size=32B cardinality=3
 |
 01:AGGREGATE
 |  Class 0
 |    group by: tinyint_col
 |  Class 1
 |    group by: int_col
 |  Class 2
 |    output: min(float_col), max(float_col), sum(double_col)
 |  row-size=21B cardinality=21
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=17B cardinality=7.30K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 03:AGGREGATE [FINALIZE]
 |  output: aggif(valid_tid(2,5,7,10,11) = 2, count(tinyint_col)), aggif(valid_tid(2,5,7,10,11) = 7, count(int_col)), aggif(valid_tid(2,5,7,10,11) = 11, min(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, max(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, sum(double_col))
 |  row-size=32B cardinality=1
 |
 07:AGGREGATE [FINALIZE]
 |  Class 0
 |    output: count:merge(tinyint_col)
 |  Class 1
 |    output: count:merge(int_col)
 |  Class 2
 |    output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
 |  row-size=32B cardinality=3
 |
 06:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE
 |  Class 0
 |    output: count(tinyint_col)
 |  Class 1
 |    output: count(int_col)
 |  Class 2
 |    output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
 |  row-size=32B cardinality=3
 |
 05:AGGREGATE
 |  Class 0
 |    group by: tinyint_col
 |  Class 1
 |    group by: int_col
 |  Class 2
 |    output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
 |  row-size=21B cardinality=21
 |
 04:EXCHANGE [HASH(CASE valid_tid(1,6,11) WHEN 1 THEN murmur_hash(tinyint_col) WHEN 6 THEN murmur_hash(int_col) WHEN 11 THEN 0 END)]
 |
 01:AGGREGATE [STREAMING]
 |  Class 0
 |    group by: tinyint_col
 |  Class 1
 |    group by: int_col
 |  Class 2
 |    output: min(float_col), max(float_col), sum(double_col)
 |  row-size=21B cardinality=21
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=17B cardinality=7.30K
 ====
 # A subset of aggregation classes are materialized. A subset of aggregation functions
 # of the surviving classes are materialized. No group by.
 select a1, c2, e2 from
   (select count(distinct tinyint_col) a1, avg(distinct tinyint_col) a2,
           count(distinct smallint_col) b1, avg(distinct smallint_col) b2,
           count(distinct int_col) c1, avg(distinct int_col) c2,
           count(distinct bigint_col) d1, avg(distinct bigint_col) d2,
           min(float_col) e1, max(float_col) e2, sum(double_col) e3
    from functional.alltypes) v
 ---- PLAN
 PLAN-ROOT SINK
 |
 03:AGGREGATE [FINALIZE]
 |  output: aggif(valid_tid(3,6,9,12,13) = 3, count(tinyint_col)), aggif(valid_tid(3,6,9,12,13) = 9, avg(int_col)), aggif(valid_tid(3,6,9,12,13) = 13, max(float_col))
 |  row-size=20B cardinality=1
 |
 02:AGGREGATE [FINALIZE]
 |  Class 0
 |    output: count(tinyint_col)
 |  Class 1
 |    output: avg(int_col)
 |  Class 2
 |    output: max:merge(float_col)
 |  row-size=20B cardinality=3
 |
 01:AGGREGATE
 |  Class 0
 |    group by: tinyint_col
 |  Class 1
 |    group by: int_col
 |  Class 2
 |    output: max(float_col)
 |  row-size=9B cardinality=21
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=9B cardinality=7.30K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 03:AGGREGATE [FINALIZE]
 |  output: aggif(valid_tid(3,6,9,12,13) = 3, count(tinyint_col)), aggif(valid_tid(3,6,9,12,13) = 9, avg(int_col)), aggif(valid_tid(3,6,9,12,13) = 13, max(float_col))
 |  row-size=20B cardinality=1
 |
 07:AGGREGATE [FINALIZE]
 |  Class 0
 |    output: count:merge(tinyint_col)
 |  Class 1
 |    output: avg:merge(int_col)
 |  Class 2
 |    output: max:merge(float_col)
 |  row-size=20B cardinality=3
 |
 06:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE
 |  Class 0
 |    output: count(tinyint_col)
 |  Class 1
 |    output: avg(int_col)
 |  Class 2
 |    output: max:merge(float_col)
 |  row-size=20B cardinality=3
 |
 05:AGGREGATE
 |  Class 0
 |    group by: tinyint_col
 |  Class 1
 |    group by: int_col
 |  Class 2
 |    output: max:merge(float_col)
 |  row-size=9B cardinality=21
 |
 04:EXCHANGE [HASH(CASE valid_tid(1,7,13) WHEN 1 THEN murmur_hash(tinyint_col) WHEN 7 THEN murmur_hash(int_col) WHEN 13 THEN 0 END)]
 |
 01:AGGREGATE [STREAMING]
 |  Class 0
 |    group by: tinyint_col
 |  Class 1
 |    group by: int_col
 |  Class 2
 |    output: max(float_col)
 |  row-size=9B cardinality=21
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=9B cardinality=7.30K
 ====
 # A subset of aggregation classes are materialized. With group by.
 select a, c, e1, e2, e3, gby2 from
   (select count(distinct tinyint_col) a, avg(distinct smallint_col) b,
           count(distinct int_col) c, avg(distinct bigint_col) d,
           min(float_col) e1, max(float_col) e2, sum(double_col) e3,
           string_col gby1, date_string_col gby2
    from functional.alltypes
    group by gby1, gby2) v
 ---- PLAN
 PLAN-ROOT SINK
 |
 03:AGGREGATE [FINALIZE]
 |  output: aggif(valid_tid(2,5,7,10,11) = 2, count(tinyint_col)), aggif(valid_tid(2,5,7,10,11) = 7, count(int_col)), aggif(valid_tid(2,5,7,10,11) = 11, min(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, max(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, sum(double_col))
 |  group by: CASE valid_tid(2,7,11) WHEN 2 THEN string_col WHEN 7 THEN string_col WHEN 11 THEN string_col END, CASE valid_tid(2,7,11) WHEN 2 THEN date_string_col WHEN 7 THEN date_string_col WHEN 11 THEN date_string_col END
 |  row-size=56B cardinality=8.11K
 |
 02:AGGREGATE [FINALIZE]
 |  Class 0
 |    output: count(tinyint_col)
 |    group by: string_col, date_string_col
 |  Class 1
 |    output: count(int_col)
 |    group by: string_col, date_string_col
 |  Class 2
 |    output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
 |    group by: string_col, date_string_col
 |  row-size=131B cardinality=22.08K
 |
 01:AGGREGATE
 |  Class 0
 |    group by: string_col, date_string_col, tinyint_col
 |  Class 1
 |    group by: string_col, date_string_col, int_col
 |  Class 2
 |    output: min(float_col), max(float_col), sum(double_col)
 |    group by: string_col, date_string_col
 |  row-size=120B cardinality=21.90K
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=50B cardinality=7.30K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 08:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE [FINALIZE]
 |  output: aggif(valid_tid(2,5,7,10,11) = 2, count(tinyint_col)), aggif(valid_tid(2,5,7,10,11) = 7, count(int_col)), aggif(valid_tid(2,5,7,10,11) = 11, min(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, max(float_col)), aggif(valid_tid(2,5,7,10,11) = 11, sum(double_col))
 |  group by: CASE valid_tid(2,7,11) WHEN 2 THEN string_col WHEN 7 THEN string_col WHEN 11 THEN string_col END, CASE valid_tid(2,7,11) WHEN 2 THEN date_string_col WHEN 7 THEN date_string_col WHEN 11 THEN date_string_col END
 |  row-size=56B cardinality=8.11K
 |
 07:AGGREGATE [FINALIZE]
 |  Class 0
 |    output: count:merge(tinyint_col)
 |    group by: string_col, date_string_col
 |  Class 1
 |    output: count:merge(int_col)
 |    group by: string_col, date_string_col
 |  Class 2
 |    output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
 |    group by: string_col, date_string_col
 |  row-size=131B cardinality=22.08K
 |
 06:EXCHANGE [HASH(CASE valid_tid(2,7,11) WHEN 2 THEN murmur_hash(string_col) WHEN 7 THEN murmur_hash(string_col) WHEN 11 THEN murmur_hash(string_col) END,CASE valid_tid(2,7,11) WHEN 2 THEN murmur_hash(date_string_col) WHEN 7 THEN murmur_hash(date_string_col) WHEN 11 THEN murmur_hash(date_string_col) END)]
 |
 02:AGGREGATE [STREAMING]
 |  Class 0
 |    output: count(tinyint_col)
 |    group by: string_col, date_string_col
 |  Class 1
 |    output: count(int_col)
 |    group by: string_col, date_string_col
 |  Class 2
 |    output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
 |    group by: string_col, date_string_col
 |  row-size=131B cardinality=22.08K
 |
 05:AGGREGATE
 |  Class 0
 |    group by: string_col, date_string_col, tinyint_col
 |  Class 1
 |    group by: string_col, date_string_col, int_col
 |  Class 2
 |    output: min:merge(float_col), max:merge(float_col), sum:merge(double_col)
 |    group by: string_col, date_string_col
 |  row-size=120B cardinality=21.90K
 |
 04:EXCHANGE [HASH(CASE valid_tid(1,6,11) WHEN 1 THEN murmur_hash(string_col) WHEN 6 THEN murmur_hash(string_col) WHEN 11 THEN murmur_hash(string_col) END,CASE valid_tid(1,6,11) WHEN 1 THEN murmur_hash(date_string_col) WHEN 6 THEN murmur_hash(date_string_col) WHEN 11 THEN murmur_hash(date_string_col) END,CASE valid_tid(1,6,11) WHEN 1 THEN murmur_hash(tinyint_col) WHEN 6 THEN murmur_hash(int_col) WHEN 11 THEN 0 END)]
 |
 01:AGGREGATE [STREAMING]
 |  Class 0
 |    group by: string_col, date_string_col, tinyint_col
 |  Class 1
 |    group by: string_col, date_string_col, int_col
 |  Class 2
 |    output: min(float_col), max(float_col), sum(double_col)
 |    group by: string_col, date_string_col
 |  row-size=120B cardinality=21.90K
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=50B cardinality=7.30K
 ====
 # A subset of aggregation classes are materialized. A subset of aggregation functions
 # of the surviving classes are materialized. With group by.
 select a1, c2, e2, gby1 from
   (select count(distinct tinyint_col) a1, avg(distinct tinyint_col) a2,
           count(distinct smallint_col) b1, avg(distinct smallint_col) b2,
           count(distinct int_col) c1, avg(distinct int_col) c2,
           count(distinct bigint_col) d1, avg(distinct bigint_col) d2,
           min(float_col) e1, max(float_col) e2, sum(double_col) e3,
           string_col gby1, date_string_col gby2
    from functional.alltypes
    group by gby1, gby2) v
 ---- PLAN
 PLAN-ROOT SINK
 |
 03:AGGREGATE [FINALIZE]
 |  output: aggif(valid_tid(3,6,9,12,13) = 3, count(tinyint_col)), aggif(valid_tid(3,6,9,12,13) = 9, avg(int_col)), aggif(valid_tid(3,6,9,12,13) = 13, max(float_col))
 |  group by: CASE valid_tid(3,9,13) WHEN 3 THEN string_col WHEN 9 THEN string_col WHEN 13 THEN string_col END, CASE valid_tid(3,9,13) WHEN 3 THEN date_string_col WHEN 9 THEN date_string_col WHEN 13 THEN date_string_col END
 |  row-size=44B cardinality=8.11K
 |
 02:AGGREGATE [FINALIZE]
 |  Class 0
 |    output: count(tinyint_col)
 |    group by: string_col, date_string_col
 |  Class 1
 |    output: avg(int_col)
 |    group by: string_col, date_string_col
 |  Class 2
 |    output: max:merge(float_col)
 |    group by: string_col, date_string_col
 |  row-size=119B cardinality=22.08K
 |
 01:AGGREGATE
 |  Class 0
 |    group by: string_col, date_string_col, tinyint_col
 |  Class 1
 |    group by: string_col, date_string_col, int_col
 |  Class 2
 |    output: max(float_col)
 |    group by: string_col, date_string_col
 |  row-size=108B cardinality=21.90K
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=42B cardinality=7.30K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 08:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE [FINALIZE]
 |  output: aggif(valid_tid(3,6,9,12,13) = 3, count(tinyint_col)), aggif(valid_tid(3,6,9,12,13) = 9, avg(int_col)), aggif(valid_tid(3,6,9,12,13) = 13, max(float_col))
 |  group by: CASE valid_tid(3,9,13) WHEN 3 THEN string_col WHEN 9 THEN string_col WHEN 13 THEN string_col END, CASE valid_tid(3,9,13) WHEN 3 THEN date_string_col WHEN 9 THEN date_string_col WHEN 13 THEN date_string_col END
 |  row-size=44B cardinality=8.11K
 |
 07:AGGREGATE [FINALIZE]
 |  Class 0
 |    output: count:merge(tinyint_col)
 |    group by: string_col, date_string_col
 |  Class 1
 |    output: avg:merge(int_col)
 |    group by: string_col, date_string_col
 |  Class 2
 |    output: max:merge(float_col)
 |    group by: string_col, date_string_col
 |  row-size=119B cardinality=22.08K
 |
 06:EXCHANGE [HASH(CASE valid_tid(2,8,13) WHEN 2 THEN murmur_hash(string_col) WHEN 8 THEN murmur_hash(string_col) WHEN 13 THEN murmur_hash(string_col) END,CASE valid_tid(2,8,13) WHEN 2 THEN murmur_hash(date_string_col) WHEN 8 THEN murmur_hash(date_string_col) WHEN 13 THEN murmur_hash(date_string_col) END)]
 |
 02:AGGREGATE [STREAMING]
 |  Class 0
 |    output: count(tinyint_col)
 |    group by: string_col, date_string_col
 |  Class 1
 |    output: avg(int_col)
 |    group by: string_col, date_string_col
 |  Class 2
 |    output: max:merge(float_col)
 |    group by: string_col, date_string_col
 |  row-size=119B cardinality=22.08K
 |
 05:AGGREGATE
 |  Class 0
 |    group by: string_col, date_string_col, tinyint_col
 |  Class 1
 |    group by: string_col, date_string_col, int_col
 |  Class 2
 |    output: max:merge(float_col)
 |    group by: string_col, date_string_col
 |  row-size=108B cardinality=21.90K
 |
 04:EXCHANGE [HASH(CASE valid_tid(1,7,13) WHEN 1 THEN murmur_hash(string_col) WHEN 7 THEN murmur_hash(string_col) WHEN 13 THEN murmur_hash(string_col) END,CASE valid_tid(1,7,13) WHEN 1 THEN murmur_hash(date_string_col) WHEN 7 THEN murmur_hash(date_string_col) WHEN 13 THEN murmur_hash(date_string_col) END,CASE valid_tid(1,7,13) WHEN 1 THEN murmur_hash(tinyint_col) WHEN 7 THEN murmur_hash(int_col) WHEN 13 THEN 0 END)]
 |
 01:AGGREGATE [STREAMING]
 |  Class 0
 |    group by: string_col, date_string_col, tinyint_col
 |  Class 1
 |    group by: string_col, date_string_col, int_col
 |  Class 2
 |    output: max(float_col)
 |    group by: string_col, date_string_col
 |  row-size=108B cardinality=21.90K
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=42B cardinality=7.30K
 ====
 # Simplifies to a single aggrgeation class. Only first distinct agg is materialized.
 select a from
   (select count(distinct tinyint_col) a, count(distinct smallint_col) b
    from functional.alltypes) v
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:AGGREGATE [FINALIZE]
 |  output: count(tinyint_col)
 |  row-size=8B cardinality=1
 |
 01:AGGREGATE
 |  group by: tinyint_col
 |  row-size=1B cardinality=10
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=1B cardinality=7.30K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 07:AGGREGATE [FINALIZE]
 |  output: count:merge(tinyint_col)
 |  row-size=8B cardinality=1
 |
 06:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE
 |  output: count(tinyint_col)
 |  row-size=8B cardinality=1
 |
 05:AGGREGATE
 |  group by: tinyint_col
 |  row-size=1B cardinality=10
 |
 04:EXCHANGE [HASH(tinyint_col)]
 |
 01:AGGREGATE [STREAMING]
 |  group by: tinyint_col
 |  row-size=1B cardinality=10
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=1B cardinality=7.30K
 ====
 # Simplifies to a single aggrgeation class. Only second distinct agg is materialized.
 select b from
   (select count(distinct tinyint_col) a, count(distinct smallint_col) b
    from functional.alltypes) v
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:AGGREGATE [FINALIZE]
 |  output: count(smallint_col)
 |  row-size=8B cardinality=1
 |
 01:AGGREGATE
 |  group by: smallint_col
 |  row-size=2B cardinality=10
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=2B cardinality=7.30K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 07:AGGREGATE [FINALIZE]
 |  output: count:merge(smallint_col)
 |  row-size=8B cardinality=1
 |
 06:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE
 |  output: count(smallint_col)
 |  row-size=8B cardinality=1
 |
 05:AGGREGATE
 |  group by: smallint_col
 |  row-size=2B cardinality=10
 |
 04:EXCHANGE [HASH(smallint_col)]
 |
 01:AGGREGATE [STREAMING]
 |  group by: smallint_col
 |  row-size=2B cardinality=10
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=2B cardinality=7.30K
 ====
 # Some aggs only referenced in HAVING clause.
 select count(distinct tinyint_col), min(timestamp_col) from functional.alltypes
 having count(distinct smallint_col) < 10 and max(date_string_col) = 'test'
 ---- PLAN
 PLAN-ROOT SINK
 |
 03:AGGREGATE [FINALIZE]
 |  output: aggif(valid_tid(2,4,5) = 2, count(tinyint_col)), aggif(valid_tid(2,4,5) = 4, count(smallint_col)), aggif(valid_tid(2,4,5) = 5, min(timestamp_col)), aggif(valid_tid(2,4,5) = 5, max(date_string_col))
 |  having: aggif(valid_tid(2,4,5) = 4, count(smallint_col)) < 10, aggif(valid_tid(2,4,5) = 5, max(date_string_col)) = 'test'
 |  row-size=44B cardinality=0
 |
 02:AGGREGATE [FINALIZE]
 |  Class 0
 |    output: count(tinyint_col)
 |  Class 1
 |    output: count(smallint_col)
 |  Class 2
 |    output: min:merge(timestamp_col), max:merge(date_string_col)
 |  row-size=44B cardinality=3
 |
 01:AGGREGATE
 |  Class 0
 |    group by: tinyint_col
 |  Class 1
 |    group by: smallint_col
 |  Class 2
 |    output: min(timestamp_col), max(date_string_col)
 |  row-size=31B cardinality=21
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=39B cardinality=7.30K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 03:AGGREGATE [FINALIZE]
 |  output: aggif(valid_tid(2,4,5) = 2, count(tinyint_col)), aggif(valid_tid(2,4,5) = 4, count(smallint_col)), aggif(valid_tid(2,4,5) = 5, min(timestamp_col)), aggif(valid_tid(2,4,5) = 5, max(date_string_col))
 |  having: aggif(valid_tid(2,4,5) = 4, count(smallint_col)) < 10, aggif(valid_tid(2,4,5) = 5, max(date_string_col)) = 'test'
 |  row-size=44B cardinality=0
 |
 07:AGGREGATE [FINALIZE]
 |  Class 0
 |    output: count:merge(tinyint_col)
 |  Class 1
 |    output: count:merge(smallint_col)
 |  Class 2
 |    output: min:merge(timestamp_col), max:merge(date_string_col)
 |  row-size=44B cardinality=3
 |
 06:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE
 |  Class 0
 |    output: count(tinyint_col)
 |  Class 1
 |    output: count(smallint_col)
 |  Class 2
 |    output: min:merge(timestamp_col), max:merge(date_string_col)
 |  row-size=44B cardinality=3
 |
 05:AGGREGATE
 |  Class 0
 |    group by: tinyint_col
 |  Class 1
 |    group by: smallint_col
 |  Class 2
 |    output: min:merge(timestamp_col), max:merge(date_string_col)
 |  row-size=31B cardinality=21
 |
 04:EXCHANGE [HASH(CASE valid_tid(1,3,5) WHEN 1 THEN murmur_hash(tinyint_col) WHEN 3 THEN murmur_hash(smallint_col) WHEN 5 THEN 0 END)]
 |
 01:AGGREGATE [STREAMING]
 |  Class 0
 |    group by: tinyint_col
 |  Class 1
 |    group by: smallint_col
 |  Class 2
 |    output: min(timestamp_col), max(date_string_col)
 |  row-size=31B cardinality=21
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=39B cardinality=7.30K
 ====
 # Some aggs only referenced in ORDER BY clause.
 select count(distinct tinyint_col), min(timestamp_col) from functional.alltypes
 group by bigint_col
 order by count(distinct smallint_col), max(date_string_col)
 ---- PLAN
 PLAN-ROOT SINK
 |
 04:SORT
 |  order by: aggif(valid_tid(2,4,5) = 4, count(smallint_col)) ASC, aggif(valid_tid(2,4,5) = 5, max(date_string_col)) ASC
 |  row-size=44B cardinality=11
 |
 03:AGGREGATE [FINALIZE]
 |  output: aggif(valid_tid(2,4,5) = 2, count(tinyint_col)), aggif(valid_tid(2,4,5) = 4, count(smallint_col)), aggif(valid_tid(2,4,5) = 5, min(timestamp_col)), aggif(valid_tid(2,4,5) = 5, max(date_string_col))
 |  group by: CASE valid_tid(2,4,5) WHEN 2 THEN bigint_col WHEN 4 THEN bigint_col WHEN 5 THEN bigint_col END
 |  row-size=52B cardinality=11
 |
 02:AGGREGATE [FINALIZE]
 |  Class 0
 |    output: count(tinyint_col)
 |    group by: bigint_col
 |  Class 1
 |    output: count(smallint_col)
 |    group by: bigint_col
 |  Class 2
 |    output: min:merge(timestamp_col), max:merge(date_string_col)
 |    group by: bigint_col
 |  row-size=68B cardinality=30
 |
 01:AGGREGATE
 |  Class 0
 |    group by: bigint_col, tinyint_col
 |  Class 1
 |    group by: bigint_col, smallint_col
 |  Class 2
 |    output: min(timestamp_col), max(date_string_col)
 |    group by: bigint_col
 |  row-size=55B cardinality=210
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=47B cardinality=7.30K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 09:MERGING-EXCHANGE [UNPARTITIONED]
 |  order by: aggif(valid_tid(2,4,5) = 4, count(smallint_col)) ASC, aggif(valid_tid(2,4,5) = 5, max(date_string_col)) ASC
 |
 04:SORT
 |  order by: aggif(valid_tid(2,4,5) = 4, count(smallint_col)) ASC, aggif(valid_tid(2,4,5) = 5, max(date_string_col)) ASC
 |  row-size=44B cardinality=11
 |
 03:AGGREGATE [FINALIZE]
 |  output: aggif(valid_tid(2,4,5) = 2, count(tinyint_col)), aggif(valid_tid(2,4,5) = 4, count(smallint_col)), aggif(valid_tid(2,4,5) = 5, min(timestamp_col)), aggif(valid_tid(2,4,5) = 5, max(date_string_col))
 |  group by: CASE valid_tid(2,4,5) WHEN 2 THEN bigint_col WHEN 4 THEN bigint_col WHEN 5 THEN bigint_col END
 |  row-size=52B cardinality=11
 |
 08:AGGREGATE [FINALIZE]
 |  Class 0
 |    output: count:merge(tinyint_col)
 |    group by: bigint_col
 |  Class 1
 |    output: count:merge(smallint_col)
 |    group by: bigint_col
 |  Class 2
 |    output: min:merge(timestamp_col), max:merge(date_string_col)
 |    group by: bigint_col
 |  row-size=68B cardinality=30
 |
 07:EXCHANGE [HASH(CASE valid_tid(2,4,5) WHEN 2 THEN murmur_hash(bigint_col) WHEN 4 THEN murmur_hash(bigint_col) WHEN 5 THEN murmur_hash(bigint_col) END)]
 |
 02:AGGREGATE [STREAMING]
 |  Class 0
 |    output: count(tinyint_col)
 |    group by: bigint_col
 |  Class 1
 |    output: count(smallint_col)
 |    group by: bigint_col
 |  Class 2
 |    output: min:merge(timestamp_col), max:merge(date_string_col)
 |    group by: bigint_col
 |  row-size=68B cardinality=30
 |
 06:AGGREGATE
 |  Class 0
 |    group by: bigint_col, tinyint_col
 |  Class 1
 |    group by: bigint_col, smallint_col
 |  Class 2
 |    output: min:merge(timestamp_col), max:merge(date_string_col)
 |    group by: bigint_col
 |  row-size=55B cardinality=210
 |
 05:EXCHANGE [HASH(CASE valid_tid(1,3,5) WHEN 1 THEN murmur_hash(bigint_col) WHEN 3 THEN murmur_hash(bigint_col) WHEN 5 THEN murmur_hash(bigint_col) END,CASE valid_tid(1,3,5) WHEN 1 THEN murmur_hash(tinyint_col) WHEN 3 THEN murmur_hash(smallint_col) WHEN 5 THEN 0 END)]
 |
 01:AGGREGATE [STREAMING]
 |  Class 0
 |    group by: bigint_col, tinyint_col
 |  Class 1
 |    group by: bigint_col, smallint_col
 |  Class 2
 |    output: min(timestamp_col), max(date_string_col)
 |    group by: bigint_col
 |  row-size=55B cardinality=210
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=47B cardinality=7.30K
 ====
 # Mixed distinct and non-distinct aggs. No materialized aggregations. No group by.
 select 1 from
   (select min(string_col) a, count(distinct tinyint_col) b,
           max(string_col) c, count(distinct smallint_col) d
    from functional.alltypes) v
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  row-size=0B cardinality=1
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=0B cardinality=7.30K
 ====
 # Mixed distinct and non-distinct aggs. No materialized aggregations. With group by.
 select v.gby1 from
   (select min(string_col) a, count(distinct tinyint_col) b,
           max(string_col) c, count(distinct smallint_col) d,
           string_col gby1, date_string_col gby2
    from functional.alltypes
    group by gby1, gby2) v
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  group by: string_col, date_string_col
 |  row-size=33B cardinality=7.30K
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=33B cardinality=7.30K
 ====
 # Only distinct aggs. No materialized aggregations. No group by.
 select 1 from
   (select count(distinct tinyint_col) a, count(distinct smallint_col) b
    from functional.alltypes) v
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:AGGREGATE [FINALIZE]
 |  row-size=0B cardinality=1
 |
 01:AGGREGATE
 |  group by: smallint_col
 |  row-size=2B cardinality=10
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=2B cardinality=7.30K
 ====
 # Only distinct aggs. No materialized aggregations. With group by.
 select v.gby2 from
   (select count(distinct tinyint_col) a, count(distinct smallint_col) b,
           string_col gby1, date_string_col gby2
    from functional.alltypes
    group by gby1, gby2) v
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:AGGREGATE [FINALIZE]
 |  group by: string_col, date_string_col
 |  row-size=33B cardinality=7.30K
 |
 01:AGGREGATE
 |  group by: string_col, date_string_col, smallint_col
 |  row-size=35B cardinality=7.30K
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=35B cardinality=7.30K
 ====
 # Simplifies to a single aggregation class. Only first distinct agg is materialized.
 # No group by.
 select b from
   (select min(string_col) a, count(distinct tinyint_col) b,
           max(string_col) c, count(distinct smallint_col) d
    from functional.alltypes
    having count(distinct tinyint_col) < 9) v
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:AGGREGATE [FINALIZE]
 |  output: count(tinyint_col)
 |  having: count(tinyint_col) < 9
 |  row-size=8B cardinality=0
 |
 01:AGGREGATE
 |  group by: tinyint_col
 |  row-size=1B cardinality=10
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=1B cardinality=7.30K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 07:AGGREGATE [FINALIZE]
 |  output: count:merge(tinyint_col)
 |  having: count(tinyint_col) < 9
 |  row-size=8B cardinality=1
 |
 06:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE
 |  output: count(tinyint_col)
 |  row-size=8B cardinality=0
 |
 05:AGGREGATE
 |  group by: tinyint_col
 |  row-size=1B cardinality=10
 |
 04:EXCHANGE [HASH(tinyint_col)]
 |
 01:AGGREGATE [STREAMING]
 |  group by: tinyint_col
 |  row-size=1B cardinality=10
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=1B cardinality=7.30K
 ====
 # Simplifies to a single aggregation class. Only second distinct agg is materialized.
 # No group by.
 select d from
   (select min(string_col) a, count(distinct tinyint_col) b,
           max(string_col) c, count(distinct smallint_col) d
    from functional.alltypes
    having count(distinct smallint_col) < 9) v
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:AGGREGATE [FINALIZE]
 |  output: count(smallint_col)
 |  having: count(smallint_col) < 9
 |  row-size=8B cardinality=0
 |
 01:AGGREGATE
 |  group by: smallint_col
 |  row-size=2B cardinality=10
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=2B cardinality=7.30K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 07:AGGREGATE [FINALIZE]
 |  output: count:merge(smallint_col)
 |  having: count(smallint_col) < 9
 |  row-size=8B cardinality=1
 |
 06:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE
 |  output: count(smallint_col)
 |  row-size=8B cardinality=0
 |
 05:AGGREGATE
 |  group by: smallint_col
 |  row-size=2B cardinality=10
 |
 04:EXCHANGE [HASH(smallint_col)]
 |
 01:AGGREGATE [STREAMING]
 |  group by: smallint_col
 |  row-size=2B cardinality=10
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=2B cardinality=7.30K
 ====
 # Simplifies to a single aggregation class. Only non-distinct aggs remain.
 # No group by.
 select a, c from
   (select min(string_col) a, count(distinct tinyint_col) b,
           max(string_col) c, count(distinct smallint_col) d
    from functional.alltypes
    having min(string_col) < '9') v
 where c > '0' and c < a
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  output: min(string_col), max(string_col)
 |  having: max(string_col) > '0', min(string_col) < '9', max(string_col) < min(string_col)
 |  row-size=24B cardinality=0
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=13B cardinality=7.30K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 04:AGGREGATE [FINALIZE]
 |  output: min:merge(string_col), max:merge(string_col)
 |  having: max(string_col) > '0', min(string_col) < '9', max(string_col) < min(string_col)
 |  row-size=24B cardinality=0
 |
 03:EXCHANGE [UNPARTITIONED]
 |
 01:AGGREGATE
 |  output: min(string_col), max(string_col)
 |  row-size=24B cardinality=1
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=13B cardinality=7.30K
 ====
 # Simplifies to a single aggregation class with distinct and non-distinct aggs.
 # No group by.
 select a, b, c from
   (select min(string_col) a, count(distinct tinyint_col) b,
           max(string_col) c, count(distinct smallint_col) d
    from functional.alltypes
    having min(string_col) < '9' and count(distinct tinyint_col) = 10) v
 where c > '0' and c < a
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:AGGREGATE [FINALIZE]
 |  output: count(tinyint_col), min:merge(string_col), max:merge(string_col)
 |  having: count(tinyint_col) = 10, max(string_col) > '0', min(string_col) < '9', max(string_col) < min(string_col)
 |  row-size=32B cardinality=0
 |
 01:AGGREGATE
 |  output: min(string_col), max(string_col)
 |  group by: tinyint_col
 |  row-size=25B cardinality=10
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=14B cardinality=7.30K
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 07:AGGREGATE [FINALIZE]
 |  output: count:merge(tinyint_col), min:merge(string_col), max:merge(string_col)
 |  having: count(tinyint_col) = 10, max(string_col) > '0', min(string_col) < '9', max(string_col) < min(string_col)
 |  row-size=32B cardinality=1
 |
 06:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE
 |  output: count(tinyint_col), min:merge(string_col), max:merge(string_col)
 |  row-size=32B cardinality=0
 |
 05:AGGREGATE
 |  output: min:merge(string_col), max:merge(string_col)
 |  group by: tinyint_col
 |  row-size=25B cardinality=10
 |
 04:EXCHANGE [HASH(tinyint_col)]
 |
 01:AGGREGATE [STREAMING]
 |  output: min(string_col), max(string_col)
 |  group by: tinyint_col
 |  row-size=25B cardinality=10
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=14B cardinality=7.30K
 ====
 # Simplifies to a single aggregation class with one distinct agg.
 # With group by.
 select b, gby1 from
   (select min(string_col) a, count(distinct tinyint_col) b,
           max(string_col) c, count(distinct smallint_col) d,
           date_string_col gby1, timestamp_col gby2
    from functional.alltypes
    group by gby1, gby2
    having count(distinct tinyint_col) < 10) v
 where gby1 = 'test1'
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:AGGREGATE [FINALIZE]
 |  output: count(tinyint_col)
 |  group by: date_string_col, timestamp_col
 |  having: count(tinyint_col) < 10
 |  row-size=44B cardinality=1
 |
 01:AGGREGATE
 |  group by: date_string_col, timestamp_col, tinyint_col
 |  row-size=37B cardinality=10
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    predicates: functional.alltypes.date_string_col = 'test1'
    row-size=37B cardinality=10
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 08:EXCHANGE [UNPARTITIONED]
 |
 07:AGGREGATE [FINALIZE]
 |  output: count:merge(tinyint_col)
 |  group by: date_string_col, timestamp_col
 |  having: count(tinyint_col) < 10
 |  row-size=44B cardinality=10
 |
 06:EXCHANGE [HASH(date_string_col,timestamp_col)]
 |
 02:AGGREGATE [STREAMING]
 |  output: count(tinyint_col)
 |  group by: date_string_col, timestamp_col
 |  row-size=44B cardinality=1
 |
 05:AGGREGATE
 |  group by: date_string_col, timestamp_col, tinyint_col
 |  row-size=37B cardinality=10
 |
 04:EXCHANGE [HASH(date_string_col,timestamp_col,tinyint_col)]
 |
 01:AGGREGATE [STREAMING]
 |  group by: date_string_col, timestamp_col, tinyint_col
 |  row-size=37B cardinality=10
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    predicates: functional.alltypes.date_string_col = 'test1'
    row-size=37B cardinality=10
 ====
 # Simplifies to a single aggregation class with two non-distinct aggs.
 # With group by.
 select a, c, gby1, gby2 from
   (select min(string_col) a, count(distinct tinyint_col) b,
           max(string_col) c, count(distinct smallint_col) d,
           date_string_col gby1, timestamp_col gby2
    from functional.alltypes
    group by gby1, gby2
    having count(distinct tinyint_col) < 10) v
 where gby1 = 'test1' and gby1 < gby2
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:AGGREGATE [FINALIZE]
 |  output: count(tinyint_col), min:merge(string_col), max:merge(string_col)
 |  group by: date_string_col, timestamp_col
 |  having: count(tinyint_col) < 10, date_string_col < timestamp_col
 |  row-size=68B cardinality=0
 |
 01:AGGREGATE
 |  output: min(string_col), max(string_col)
 |  group by: date_string_col, timestamp_col, tinyint_col
 |  row-size=61B cardinality=3
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    predicates: functional.alltypes.timestamp_col > NULL, functional.alltypes.date_string_col = 'test1'
    row-size=50B cardinality=3
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 08:EXCHANGE [UNPARTITIONED]
 |
 07:AGGREGATE [FINALIZE]
 |  output: count:merge(tinyint_col), min:merge(string_col), max:merge(string_col)
 |  group by: date_string_col, timestamp_col
 |  having: count(tinyint_col) < 10, date_string_col < timestamp_col
 |  row-size=68B cardinality=3
 |
 06:EXCHANGE [HASH(date_string_col,timestamp_col)]
 |
 02:AGGREGATE [STREAMING]
 |  output: count(tinyint_col), min:merge(string_col), max:merge(string_col)
 |  group by: date_string_col, timestamp_col
 |  row-size=68B cardinality=0
 |
 05:AGGREGATE
 |  output: min:merge(string_col), max:merge(string_col)
 |  group by: date_string_col, timestamp_col, tinyint_col
 |  row-size=61B cardinality=3
 |
 04:EXCHANGE [HASH(date_string_col,timestamp_col,tinyint_col)]
 |
 01:AGGREGATE [STREAMING]
 |  output: min(string_col), max(string_col)
 |  group by: date_string_col, timestamp_col, tinyint_col
 |  row-size=61B cardinality=3
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    predicates: functional.alltypes.timestamp_col > NULL, functional.alltypes.date_string_col = 'test1'
    row-size=50B cardinality=3
 ====
 # Simplifies to a single aggregation class with one distinct and one non-distinct agg.
 # With group by.
 select c, d, gby1, gby2 from
   (select min(string_col) a, count(distinct tinyint_col) b,
           max(string_col) c, count(distinct smallint_col) d,
           date_string_col gby1, timestamp_col gby2
    from functional.alltypes
    group by gby1, gby2
    having count(distinct smallint_col) < 20) v
 where gby1 = 'test1' and gby1 < gby2
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:AGGREGATE [FINALIZE]
 |  output: count(smallint_col), max:merge(string_col)
 |  group by: date_string_col, timestamp_col
 |  having: count(smallint_col) < 20, date_string_col < timestamp_col
 |  row-size=56B cardinality=0
 |
 01:AGGREGATE
 |  output: max(string_col)
 |  group by: date_string_col, timestamp_col, smallint_col
 |  row-size=50B cardinality=3
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    predicates: functional.alltypes.timestamp_col > NULL, functional.alltypes.date_string_col = 'test1'
    row-size=51B cardinality=3
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 08:EXCHANGE [UNPARTITIONED]
 |
 07:AGGREGATE [FINALIZE]
 |  output: count:merge(smallint_col), max:merge(string_col)
 |  group by: date_string_col, timestamp_col
 |  having: count(smallint_col) < 20, date_string_col < timestamp_col
 |  row-size=56B cardinality=3
 |
 06:EXCHANGE [HASH(date_string_col,timestamp_col)]
 |
 02:AGGREGATE [STREAMING]
 |  output: count(smallint_col), max:merge(string_col)
 |  group by: date_string_col, timestamp_col
 |  row-size=56B cardinality=0
 |
 05:AGGREGATE
 |  output: max:merge(string_col)
 |  group by: date_string_col, timestamp_col, smallint_col
 |  row-size=50B cardinality=3
 |
 04:EXCHANGE [HASH(date_string_col,timestamp_col,smallint_col)]
 |
 01:AGGREGATE [STREAMING]
 |  output: max(string_col)
 |  group by: date_string_col, timestamp_col, smallint_col
 |  row-size=50B cardinality=3
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    predicates: functional.alltypes.timestamp_col > NULL, functional.alltypes.date_string_col = 'test1'
    row-size=51B cardinality=3
 ====