testdata/workloads/functional-planner/queries/PlannerTest/shuffle-by-distinct-exprs.test - impala - Git at Google

 # Distinct agg without a grouping expr
 select count(distinct int_col) from functional.alltypes;
 ---- QUERYOPTIONS
 SHUFFLE_DISTINCT_EXPRS=false
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 06:AGGREGATE [FINALIZE]
 |  output: count:merge(int_col)
 |  row-size=8B cardinality=1
 |
 05:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE
 |  output: count(int_col)
 |  row-size=8B cardinality=1
 |
 04:AGGREGATE
 |  group by: int_col
 |  row-size=4B cardinality=10
 |
 03:EXCHANGE [HASH(int_col)]
 |
 01:AGGREGATE [STREAMING]
 |  group by: int_col
 |  row-size=4B cardinality=10
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=4B cardinality=7.30K
 ====
 select count(distinct int_col) from functional.alltypes;
 ---- QUERYOPTIONS
 # Distinct exprs in a aggregation without grouping is always shuffled by. Setting it to
 # true doesn't affect the plan.
 SHUFFLE_DISTINCT_EXPRS=true
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 06:AGGREGATE [FINALIZE]
 |  output: count:merge(int_col)
 |  row-size=8B cardinality=1
 |
 05:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE
 |  output: count(int_col)
 |  row-size=8B cardinality=1
 |
 04:AGGREGATE
 |  group by: int_col
 |  row-size=4B cardinality=10
 |
 03:EXCHANGE [HASH(int_col)]
 |
 01:AGGREGATE [STREAMING]
 |  group by: int_col
 |  row-size=4B cardinality=10
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=4B cardinality=7.30K
 ====
 # Distinct agg with a grouping expr
 select count(distinct int_col) from functional.alltypes group by year;
 ---- QUERYOPTIONS
 SHUFFLE_DISTINCT_EXPRS=false
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 05:EXCHANGE [UNPARTITIONED]
 |
 02:AGGREGATE [FINALIZE]
 |  output: count(int_col)
 |  group by: `year`
 |  row-size=12B cardinality=2
 |
 04:AGGREGATE
 |  group by: `year`, int_col
 |  row-size=8B cardinality=20
 |
 03:EXCHANGE [HASH(`year`)]
 |
 01:AGGREGATE [STREAMING]
 |  group by: `year`, int_col
 |  row-size=8B cardinality=20
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=8B cardinality=7.30K
 ====
 select count(distinct int_col) from functional.alltypes group by year;
 ---- QUERYOPTIONS
 # Shuffling by distinct exprs will create 1 more exchange node and 1 more agg node.
 SHUFFLE_DISTINCT_EXPRS=true
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 07:EXCHANGE [UNPARTITIONED]
 |
 06:AGGREGATE [FINALIZE]
 |  output: count:merge(int_col)
 |  group by: `year`
 |  row-size=12B cardinality=2
 |
 05:EXCHANGE [HASH(`year`)]
 |
 02:AGGREGATE [STREAMING]
 |  output: count(int_col)
 |  group by: `year`
 |  row-size=12B cardinality=2
 |
 04:AGGREGATE
 |  group by: `year`, int_col
 |  row-size=8B cardinality=20
 |
 03:EXCHANGE [HASH(`year`,int_col)]
 |
 01:AGGREGATE [STREAMING]
 |  group by: `year`, int_col
 |  row-size=8B cardinality=20
 |
 00:SCAN HDFS [functional.alltypes]
    HDFS partitions=24/24 files=24 size=478.45KB
    row-size=8B cardinality=7.30K
 ====
 # Distinct agg without a grouping expr and with a compatible child partition
 select count(distinct a.int_col) from functional.alltypes a inner join [shuffle]
     functional.alltypes b on a.int_col = b.int_col;
 ---- QUERYOPTIONS
 SHUFFLE_DISTINCT_EXPRS=false
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 08:AGGREGATE [FINALIZE]
 |  output: count:merge(a.int_col)
 |  row-size=8B cardinality=1
 |
 07:EXCHANGE [UNPARTITIONED]
 |
 04:AGGREGATE
 |  output: count(a.int_col)
 |  row-size=8B cardinality=1
 |
 03:AGGREGATE
 |  group by: a.int_col
 |  row-size=4B cardinality=10
 |
 02:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash predicates: a.int_col = b.int_col
 |  runtime filters: RF000 <- b.int_col
 |  row-size=8B cardinality=5.33M
 |
 |--06:EXCHANGE [HASH(b.int_col)]
 |  |
 |  01:SCAN HDFS [functional.alltypes b]
 |     HDFS partitions=24/24 files=24 size=478.45KB
 |     row-size=4B cardinality=7.30K
 |
 05:EXCHANGE [HASH(a.int_col)]
 |
 00:SCAN HDFS [functional.alltypes a]
    HDFS partitions=24/24 files=24 size=478.45KB
    runtime filters: RF000 -> a.int_col
    row-size=4B cardinality=7.30K
 ====
 select count(distinct a.int_col) from functional.alltypes a inner join [shuffle]
     functional.alltypes b on a.int_col = b.int_col;
 ---- QUERYOPTIONS
 # Distinct exprs in a aggregation without grouping is always shuffled by. Setting it to
 # true doesn't affect the plan.
 SHUFFLE_DISTINCT_EXPRS=true
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 08:AGGREGATE [FINALIZE]
 |  output: count:merge(a.int_col)
 |  row-size=8B cardinality=1
 |
 07:EXCHANGE [UNPARTITIONED]
 |
 04:AGGREGATE
 |  output: count(a.int_col)
 |  row-size=8B cardinality=1
 |
 03:AGGREGATE
 |  group by: a.int_col
 |  row-size=4B cardinality=10
 |
 02:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash predicates: a.int_col = b.int_col
 |  runtime filters: RF000 <- b.int_col
 |  row-size=8B cardinality=5.33M
 |
 |--06:EXCHANGE [HASH(b.int_col)]
 |  |
 |  01:SCAN HDFS [functional.alltypes b]
 |     HDFS partitions=24/24 files=24 size=478.45KB
 |     row-size=4B cardinality=7.30K
 |
 05:EXCHANGE [HASH(a.int_col)]
 |
 00:SCAN HDFS [functional.alltypes a]
    HDFS partitions=24/24 files=24 size=478.45KB
    runtime filters: RF000 -> a.int_col
    row-size=4B cardinality=7.30K
 ====
 # Distinct agg with a grouping expr and a compatible child partition
 select count(distinct a.int_col) from functional.alltypes a inner join [shuffle]
     functional.alltypes b on a.year = b.year group by a.year;
 ---- QUERYOPTIONS
 SHUFFLE_DISTINCT_EXPRS=false
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 07:EXCHANGE [UNPARTITIONED]
 |
 04:AGGREGATE [FINALIZE]
 |  output: count(a.int_col)
 |  group by: a.`year`
 |  row-size=12B cardinality=2
 |
 03:AGGREGATE
 |  group by: a.`year`, a.int_col
 |  row-size=8B cardinality=20
 |
 02:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash predicates: a.`year` = b.`year`
 |  runtime filters: RF000 <- b.`year`
 |  row-size=12B cardinality=87.60K
 |
 |--06:EXCHANGE [HASH(b.`year`)]
 |  |
 |  01:SCAN HDFS [functional.alltypes b]
 |     HDFS partitions=24/24 files=24 size=478.45KB
 |     partition key scan
 |     row-size=4B cardinality=24
 |
 05:EXCHANGE [HASH(a.`year`)]
 |
 00:SCAN HDFS [functional.alltypes a]
    HDFS partitions=24/24 files=24 size=478.45KB
    runtime filters: RF000 -> a.`year`
    row-size=8B cardinality=7.30K
 ====
 select count(distinct a.int_col) from functional.alltypes a inner join [shuffle]
     functional.alltypes b on a.year = b.year group by a.year;
 ---- QUERYOPTIONS
 # Shuffling by distinct exprs will create 2 more exchange nodes and 2 more agg nodes.
 SHUFFLE_DISTINCT_EXPRS=true
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 11:EXCHANGE [UNPARTITIONED]
 |
 10:AGGREGATE [FINALIZE]
 |  output: count:merge(a.int_col)
 |  group by: a.`year`
 |  row-size=12B cardinality=2
 |
 09:EXCHANGE [HASH(a.`year`)]
 |
 04:AGGREGATE [STREAMING]
 |  output: count(a.int_col)
 |  group by: a.`year`
 |  row-size=12B cardinality=2
 |
 08:AGGREGATE
 |  group by: a.`year`, a.int_col
 |  row-size=8B cardinality=20
 |
 07:EXCHANGE [HASH(a.`year`,a.int_col)]
 |
 03:AGGREGATE [STREAMING]
 |  group by: a.`year`, a.int_col
 |  row-size=8B cardinality=20
 |
 02:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash predicates: a.`year` = b.`year`
 |  runtime filters: RF000 <- b.`year`
 |  row-size=12B cardinality=87.60K
 |
 |--06:EXCHANGE [HASH(b.`year`)]
 |  |
 |  01:SCAN HDFS [functional.alltypes b]
 |     HDFS partitions=24/24 files=24 size=478.45KB
 |     partition key scan
 |     row-size=4B cardinality=24
 |
 05:EXCHANGE [HASH(a.`year`)]
 |
 00:SCAN HDFS [functional.alltypes a]
    HDFS partitions=24/24 files=24 size=478.45KB
    runtime filters: RF000 -> a.`year`
    row-size=8B cardinality=7.30K
 ====
 # The input is partitioned by distinct exprs + grouping exprs
 select count(distinct a.int_col) from functional.alltypes a inner join [shuffle]
    functional.alltypes b on a.year = b.year and a.int_col = b.int_col group by a.year;
 ---- QUERYOPTIONS
 # The input partition is compatible with grouping exprs + distinct exprs. Phase-1 merge
 # aggregation is skipped.
 SHUFFLE_DISTINCT_EXPRS=true
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 09:EXCHANGE [UNPARTITIONED]
 |
 08:AGGREGATE [FINALIZE]
 |  output: count:merge(a.int_col)
 |  group by: a.`year`
 |  row-size=12B cardinality=2
 |
 07:EXCHANGE [HASH(a.`year`)]
 |
 04:AGGREGATE [STREAMING]
 |  output: count(a.int_col)
 |  group by: a.`year`
 |  row-size=12B cardinality=2
 |
 03:AGGREGATE
 |  group by: a.`year`, a.int_col
 |  row-size=8B cardinality=20
 |
 02:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash predicates: a.`year` = b.`year`, a.int_col = b.int_col
 |  runtime filters: RF000 <- b.`year`, RF001 <- b.int_col
 |  row-size=16B cardinality=5.33M
 |
 |--06:EXCHANGE [HASH(b.`year`,b.int_col)]
 |  |
 |  01:SCAN HDFS [functional.alltypes b]
 |     HDFS partitions=24/24 files=24 size=478.45KB
 |     row-size=8B cardinality=7.30K
 |
 05:EXCHANGE [HASH(a.`year`,a.int_col)]
 |
 00:SCAN HDFS [functional.alltypes a]
    HDFS partitions=24/24 files=24 size=478.45KB
    runtime filters: RF000 -> a.`year`, RF001 -> a.int_col
    row-size=8B cardinality=7.30K
 ====
 select count(distinct a.int_col) from functional.alltypes a inner join [shuffle]
    functional.alltypes b on a.year = b.year and a.int_col = b.int_col group by a.year;
 ---- QUERYOPTIONS
 # The input partition is not compatible with grouping exprs. Phase-1 merge aggregation is
 # executed.
 SHUFFLE_DISTINCT_EXPRS=false
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 09:EXCHANGE [UNPARTITIONED]
 |
 04:AGGREGATE [FINALIZE]
 |  output: count(a.int_col)
 |  group by: a.`year`
 |  row-size=12B cardinality=2
 |
 08:AGGREGATE
 |  group by: a.`year`, a.int_col
 |  row-size=8B cardinality=20
 |
 07:EXCHANGE [HASH(a.`year`)]
 |
 03:AGGREGATE [STREAMING]
 |  group by: a.`year`, a.int_col
 |  row-size=8B cardinality=20
 |
 02:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash predicates: a.`year` = b.`year`, a.int_col = b.int_col
 |  runtime filters: RF000 <- b.`year`, RF001 <- b.int_col
 |  row-size=16B cardinality=5.33M
 |
 |--06:EXCHANGE [HASH(b.`year`,b.int_col)]
 |  |
 |  01:SCAN HDFS [functional.alltypes b]
 |     HDFS partitions=24/24 files=24 size=478.45KB
 |     row-size=8B cardinality=7.30K
 |
 05:EXCHANGE [HASH(a.`year`,a.int_col)]
 |
 00:SCAN HDFS [functional.alltypes a]
    HDFS partitions=24/24 files=24 size=478.45KB
    runtime filters: RF000 -> a.`year`, RF001 -> a.int_col
    row-size=8B cardinality=7.30K
 ====
	# Distinct agg without a grouping expr
	select count(distinct int_col) from functional.alltypes;
	---- QUERYOPTIONS
	SHUFFLE_DISTINCT_EXPRS=false
	---- DISTRIBUTEDPLAN
	PLAN-ROOT SINK
	\|
	06:AGGREGATE [FINALIZE]
	\| output: count:merge(int_col)
	\| row-size=8B cardinality=1
	\|
	05:EXCHANGE [UNPARTITIONED]
	\|
	02:AGGREGATE
	\| output: count(int_col)
	\| row-size=8B cardinality=1
	\|
	04:AGGREGATE
	\| group by: int_col
	\| row-size=4B cardinality=10
	\|
	03:EXCHANGE [HASH(int_col)]
	\|
	01:AGGREGATE [STREAMING]
	\| group by: int_col
	\| row-size=4B cardinality=10
	\|
	00:SCAN HDFS [functional.alltypes]
	HDFS partitions=24/24 files=24 size=478.45KB
	row-size=4B cardinality=7.30K
	====
	select count(distinct int_col) from functional.alltypes;
	---- QUERYOPTIONS
	# Distinct exprs in a aggregation without grouping is always shuffled by. Setting it to
	# true doesn't affect the plan.
	SHUFFLE_DISTINCT_EXPRS=true
	---- DISTRIBUTEDPLAN
	PLAN-ROOT SINK
	\|
	06:AGGREGATE [FINALIZE]
	\| output: count:merge(int_col)
	\| row-size=8B cardinality=1
	\|
	05:EXCHANGE [UNPARTITIONED]
	\|
	02:AGGREGATE
	\| output: count(int_col)
	\| row-size=8B cardinality=1
	\|
	04:AGGREGATE
	\| group by: int_col
	\| row-size=4B cardinality=10
	\|
	03:EXCHANGE [HASH(int_col)]
	\|
	01:AGGREGATE [STREAMING]
	\| group by: int_col
	\| row-size=4B cardinality=10
	\|
	00:SCAN HDFS [functional.alltypes]
	HDFS partitions=24/24 files=24 size=478.45KB
	row-size=4B cardinality=7.30K
	====
	# Distinct agg with a grouping expr
	select count(distinct int_col) from functional.alltypes group by year;
	---- QUERYOPTIONS
	SHUFFLE_DISTINCT_EXPRS=false
	---- DISTRIBUTEDPLAN
	PLAN-ROOT SINK
	\|
	05:EXCHANGE [UNPARTITIONED]
	\|
	02:AGGREGATE [FINALIZE]
	\| output: count(int_col)
	\| group by: `year`
	\| row-size=12B cardinality=2
	\|
	04:AGGREGATE
	\| group by: `year`, int_col
	\| row-size=8B cardinality=20
	\|
	03:EXCHANGE [HASH(`year`)]
	\|
	01:AGGREGATE [STREAMING]
	\| group by: `year`, int_col
	\| row-size=8B cardinality=20
	\|
	00:SCAN HDFS [functional.alltypes]
	HDFS partitions=24/24 files=24 size=478.45KB
	row-size=8B cardinality=7.30K
	====
	select count(distinct int_col) from functional.alltypes group by year;
	---- QUERYOPTIONS
	# Shuffling by distinct exprs will create 1 more exchange node and 1 more agg node.
	SHUFFLE_DISTINCT_EXPRS=true
	---- DISTRIBUTEDPLAN
	PLAN-ROOT SINK
	\|
	07:EXCHANGE [UNPARTITIONED]
	\|
	06:AGGREGATE [FINALIZE]
	\| output: count:merge(int_col)
	\| group by: `year`
	\| row-size=12B cardinality=2
	\|
	05:EXCHANGE [HASH(`year`)]
	\|
	02:AGGREGATE [STREAMING]
	\| output: count(int_col)
	\| group by: `year`
	\| row-size=12B cardinality=2
	\|
	04:AGGREGATE
	\| group by: `year`, int_col
	\| row-size=8B cardinality=20
	\|
	03:EXCHANGE [HASH(`year`,int_col)]
	\|
	01:AGGREGATE [STREAMING]
	\| group by: `year`, int_col
	\| row-size=8B cardinality=20
	\|
	00:SCAN HDFS [functional.alltypes]
	HDFS partitions=24/24 files=24 size=478.45KB
	row-size=8B cardinality=7.30K
	====
	# Distinct agg without a grouping expr and with a compatible child partition
	select count(distinct a.int_col) from functional.alltypes a inner join [shuffle]
	functional.alltypes b on a.int_col = b.int_col;
	---- QUERYOPTIONS
	SHUFFLE_DISTINCT_EXPRS=false
	---- DISTRIBUTEDPLAN
	PLAN-ROOT SINK
	\|
	08:AGGREGATE [FINALIZE]
	\| output: count:merge(a.int_col)
	\| row-size=8B cardinality=1
	\|
	07:EXCHANGE [UNPARTITIONED]
	\|
	04:AGGREGATE
	\| output: count(a.int_col)
	\| row-size=8B cardinality=1
	\|
	03:AGGREGATE
	\| group by: a.int_col
	\| row-size=4B cardinality=10
	\|
	02:HASH JOIN [INNER JOIN, PARTITIONED]
	\| hash predicates: a.int_col = b.int_col
	\| runtime filters: RF000 <- b.int_col
	\| row-size=8B cardinality=5.33M
	\|
	\|--06:EXCHANGE [HASH(b.int_col)]
	\| \|
	\| 01:SCAN HDFS [functional.alltypes b]
	\| HDFS partitions=24/24 files=24 size=478.45KB
	\| row-size=4B cardinality=7.30K
	\|
	05:EXCHANGE [HASH(a.int_col)]
	\|
	00:SCAN HDFS [functional.alltypes a]
	HDFS partitions=24/24 files=24 size=478.45KB
	runtime filters: RF000 -> a.int_col
	row-size=4B cardinality=7.30K
	====
	select count(distinct a.int_col) from functional.alltypes a inner join [shuffle]
	functional.alltypes b on a.int_col = b.int_col;
	---- QUERYOPTIONS
	# Distinct exprs in a aggregation without grouping is always shuffled by. Setting it to
	# true doesn't affect the plan.
	SHUFFLE_DISTINCT_EXPRS=true
	---- DISTRIBUTEDPLAN
	PLAN-ROOT SINK
	\|
	08:AGGREGATE [FINALIZE]
	\| output: count:merge(a.int_col)
	\| row-size=8B cardinality=1
	\|
	07:EXCHANGE [UNPARTITIONED]
	\|
	04:AGGREGATE
	\| output: count(a.int_col)
	\| row-size=8B cardinality=1
	\|
	03:AGGREGATE
	\| group by: a.int_col
	\| row-size=4B cardinality=10
	\|
	02:HASH JOIN [INNER JOIN, PARTITIONED]
	\| hash predicates: a.int_col = b.int_col
	\| runtime filters: RF000 <- b.int_col
	\| row-size=8B cardinality=5.33M
	\|
	\|--06:EXCHANGE [HASH(b.int_col)]
	\| \|
	\| 01:SCAN HDFS [functional.alltypes b]
	\| HDFS partitions=24/24 files=24 size=478.45KB
	\| row-size=4B cardinality=7.30K
	\|
	05:EXCHANGE [HASH(a.int_col)]
	\|
	00:SCAN HDFS [functional.alltypes a]
	HDFS partitions=24/24 files=24 size=478.45KB
	runtime filters: RF000 -> a.int_col
	row-size=4B cardinality=7.30K
	====
	# Distinct agg with a grouping expr and a compatible child partition
	select count(distinct a.int_col) from functional.alltypes a inner join [shuffle]
	functional.alltypes b on a.year = b.year group by a.year;
	---- QUERYOPTIONS
	SHUFFLE_DISTINCT_EXPRS=false
	---- DISTRIBUTEDPLAN
	PLAN-ROOT SINK
	\|
	07:EXCHANGE [UNPARTITIONED]
	\|
	04:AGGREGATE [FINALIZE]
	\| output: count(a.int_col)
	\| group by: a.`year`
	\| row-size=12B cardinality=2
	\|
	03:AGGREGATE
	\| group by: a.`year`, a.int_col
	\| row-size=8B cardinality=20
	\|
	02:HASH JOIN [INNER JOIN, PARTITIONED]
	\| hash predicates: a.`year` = b.`year`
	\| runtime filters: RF000 <- b.`year`
	\| row-size=12B cardinality=87.60K
	\|
	\|--06:EXCHANGE [HASH(b.`year`)]
	\| \|
	\| 01:SCAN HDFS [functional.alltypes b]
	\| HDFS partitions=24/24 files=24 size=478.45KB
	\| partition key scan
	\| row-size=4B cardinality=24
	\|
	05:EXCHANGE [HASH(a.`year`)]
	\|
	00:SCAN HDFS [functional.alltypes a]
	HDFS partitions=24/24 files=24 size=478.45KB
	runtime filters: RF000 -> a.`year`
	row-size=8B cardinality=7.30K
	====
	select count(distinct a.int_col) from functional.alltypes a inner join [shuffle]
	functional.alltypes b on a.year = b.year group by a.year;
	---- QUERYOPTIONS
	# Shuffling by distinct exprs will create 2 more exchange nodes and 2 more agg nodes.
	SHUFFLE_DISTINCT_EXPRS=true
	---- DISTRIBUTEDPLAN
	PLAN-ROOT SINK
	\|
	11:EXCHANGE [UNPARTITIONED]
	\|
	10:AGGREGATE [FINALIZE]
	\| output: count:merge(a.int_col)
	\| group by: a.`year`
	\| row-size=12B cardinality=2
	\|
	09:EXCHANGE [HASH(a.`year`)]
	\|
	04:AGGREGATE [STREAMING]
	\| output: count(a.int_col)
	\| group by: a.`year`
	\| row-size=12B cardinality=2
	\|
	08:AGGREGATE
	\| group by: a.`year`, a.int_col
	\| row-size=8B cardinality=20
	\|
	07:EXCHANGE [HASH(a.`year`,a.int_col)]
	\|
	03:AGGREGATE [STREAMING]
	\| group by: a.`year`, a.int_col
	\| row-size=8B cardinality=20
	\|
	02:HASH JOIN [INNER JOIN, PARTITIONED]
	\| hash predicates: a.`year` = b.`year`
	\| runtime filters: RF000 <- b.`year`
	\| row-size=12B cardinality=87.60K
	\|
	\|--06:EXCHANGE [HASH(b.`year`)]
	\| \|
	\| 01:SCAN HDFS [functional.alltypes b]
	\| HDFS partitions=24/24 files=24 size=478.45KB
	\| partition key scan
	\| row-size=4B cardinality=24
	\|
	05:EXCHANGE [HASH(a.`year`)]
	\|
	00:SCAN HDFS [functional.alltypes a]
	HDFS partitions=24/24 files=24 size=478.45KB
	runtime filters: RF000 -> a.`year`
	row-size=8B cardinality=7.30K
	====
	# The input is partitioned by distinct exprs + grouping exprs
	select count(distinct a.int_col) from functional.alltypes a inner join [shuffle]
	functional.alltypes b on a.year = b.year and a.int_col = b.int_col group by a.year;
	---- QUERYOPTIONS
	# The input partition is compatible with grouping exprs + distinct exprs. Phase-1 merge
	# aggregation is skipped.
	SHUFFLE_DISTINCT_EXPRS=true
	---- DISTRIBUTEDPLAN
	PLAN-ROOT SINK
	\|
	09:EXCHANGE [UNPARTITIONED]
	\|
	08:AGGREGATE [FINALIZE]
	\| output: count:merge(a.int_col)
	\| group by: a.`year`
	\| row-size=12B cardinality=2
	\|
	07:EXCHANGE [HASH(a.`year`)]
	\|
	04:AGGREGATE [STREAMING]
	\| output: count(a.int_col)
	\| group by: a.`year`
	\| row-size=12B cardinality=2
	\|
	03:AGGREGATE
	\| group by: a.`year`, a.int_col
	\| row-size=8B cardinality=20
	\|
	02:HASH JOIN [INNER JOIN, PARTITIONED]
	\| hash predicates: a.`year` = b.`year`, a.int_col = b.int_col
	\| runtime filters: RF000 <- b.`year`, RF001 <- b.int_col
	\| row-size=16B cardinality=5.33M
	\|
	\|--06:EXCHANGE [HASH(b.`year`,b.int_col)]
	\| \|
	\| 01:SCAN HDFS [functional.alltypes b]
	\| HDFS partitions=24/24 files=24 size=478.45KB
	\| row-size=8B cardinality=7.30K
	\|
	05:EXCHANGE [HASH(a.`year`,a.int_col)]
	\|
	00:SCAN HDFS [functional.alltypes a]
	HDFS partitions=24/24 files=24 size=478.45KB
	runtime filters: RF000 -> a.`year`, RF001 -> a.int_col
	row-size=8B cardinality=7.30K
	====
	select count(distinct a.int_col) from functional.alltypes a inner join [shuffle]
	functional.alltypes b on a.year = b.year and a.int_col = b.int_col group by a.year;
	---- QUERYOPTIONS
	# The input partition is not compatible with grouping exprs. Phase-1 merge aggregation is
	# executed.
	SHUFFLE_DISTINCT_EXPRS=false
	---- DISTRIBUTEDPLAN
	PLAN-ROOT SINK
	\|
	09:EXCHANGE [UNPARTITIONED]
	\|
	04:AGGREGATE [FINALIZE]
	\| output: count(a.int_col)
	\| group by: a.`year`
	\| row-size=12B cardinality=2
	\|
	08:AGGREGATE
	\| group by: a.`year`, a.int_col
	\| row-size=8B cardinality=20
	\|
	07:EXCHANGE [HASH(a.`year`)]
	\|
	03:AGGREGATE [STREAMING]
	\| group by: a.`year`, a.int_col
	\| row-size=8B cardinality=20
	\|
	02:HASH JOIN [INNER JOIN, PARTITIONED]
	\| hash predicates: a.`year` = b.`year`, a.int_col = b.int_col
	\| runtime filters: RF000 <- b.`year`, RF001 <- b.int_col
	\| row-size=16B cardinality=5.33M
	\|
	\|--06:EXCHANGE [HASH(b.`year`,b.int_col)]
	\| \|
	\| 01:SCAN HDFS [functional.alltypes b]
	\| HDFS partitions=24/24 files=24 size=478.45KB
	\| row-size=8B cardinality=7.30K
	\|
	05:EXCHANGE [HASH(a.`year`,a.int_col)]
	\|
	00:SCAN HDFS [functional.alltypes a]
	HDFS partitions=24/24 files=24 size=478.45KB
	runtime filters: RF000 -> a.`year`, RF001 -> a.int_col
	row-size=8B cardinality=7.30K
	====