testdata/workloads/functional-planner/queries/PlannerTest/min-max-runtime-filters.test - impala - Git at Google

 # basic filter
 select count(*) from functional_kudu.alltypes a, functional_kudu.alltypestiny b
 where a.int_col = b.tinyint_col + 1 and a.string_col = b.string_col
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=13.44MB mem-reservation=1.94MB thread-reservation=3
 PLAN-ROOT SINK
 |  output exprs: count(*)
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
 |  tuple-ids=2 row-size=8B cardinality=1
 |  in pipelines: 03(GETNEXT), 00(OPEN)
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: a.string_col = b.string_col, a.int_col = b.tinyint_col + 1
 |  fk/pk conjuncts: none
 |  runtime filters: RF002[min_max] <- b.string_col, RF003[min_max] <- b.tinyint_col + 1
 |  mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
 |  tuple-ids=0,1 row-size=39B cardinality=5.84K
 |  in pipelines: 00(GETNEXT), 01(OPEN)
 |
 |--01:SCAN KUDU [functional_kudu.alltypestiny b]
 |     mem-estimate=1.50MB mem-reservation=0B thread-reservation=1
 |     tuple-ids=1 row-size=18B cardinality=8
 |     in pipelines: 01(GETNEXT)
 |
 00:SCAN KUDU [functional_kudu.alltypes a]
    runtime filters: RF002[min_max] -> a.string_col, RF003[min_max] -> a.int_col
    mem-estimate=1.50MB mem-reservation=0B thread-reservation=1
    tuple-ids=0 row-size=21B cardinality=7.30K
    in pipelines: 00(GETNEXT)
 ====
 # Filters are not created if the target isn't a bare Kudu column or if 'is (not) distinct'
 # is used.
 select count(*) from functional_kudu.alltypes a, functional_kudu.alltypestiny b
 where a.int_col + 1 = b.int_col
     and a.string_col is distinct from b.string_col
     and a.tinyint_col is not distinct from b.tinyint_col
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=14.19MB mem-reservation=1.94MB thread-reservation=3
 PLAN-ROOT SINK
 |  output exprs: count(*)
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
 |  tuple-ids=2 row-size=8B cardinality=1
 |  in pipelines: 03(GETNEXT), 00(OPEN)
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: a.tinyint_col IS NOT DISTINCT FROM b.tinyint_col, a.int_col + 1 = b.int_col
 |  fk/pk conjuncts: assumed fk/pk
 |  other predicates: a.string_col IS DISTINCT FROM b.string_col
 |  mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
 |  tuple-ids=0,1 row-size=44B cardinality=7.30K
 |  in pipelines: 00(GETNEXT), 01(OPEN)
 |
 |--01:SCAN KUDU [functional_kudu.alltypestiny b]
 |     mem-estimate=2.25MB mem-reservation=0B thread-reservation=1
 |     tuple-ids=1 row-size=22B cardinality=8
 |     in pipelines: 01(GETNEXT)
 |
 00:SCAN KUDU [functional_kudu.alltypes a]
    mem-estimate=2.25MB mem-reservation=0B thread-reservation=1
    tuple-ids=0 row-size=22B cardinality=7.30K
    in pipelines: 00(GETNEXT)
 ====
 # Filters are only assigned when the target expr is cast if its an implicit integer cast.
 select count(*) from functional_kudu.alltypes a, functional_kudu.alltypestiny b
 where a.tinyint_col = b.bigint_col
     and cast(a.int_col as smallint) = b.smallint_col
     and a.string_col = b.timestamp_col
     and cast(a.float_col as double) = b.double_col
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=14.94MB mem-reservation=1.94MB thread-reservation=3
 PLAN-ROOT SINK
 |  output exprs: count(*)
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 03:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
 |  tuple-ids=2 row-size=8B cardinality=1
 |  in pipelines: 03(GETNEXT), 00(OPEN)
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: CAST(a.float_col AS DOUBLE) = b.double_col, CAST(a.int_col AS SMALLINT) = b.smallint_col, a.string_col = b.timestamp_col, a.tinyint_col = b.bigint_col
 |  fk/pk conjuncts: a.string_col = b.timestamp_col, a.tinyint_col = b.bigint_col
 |  runtime filters: RF007[min_max] <- b.bigint_col
 |  mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
 |  tuple-ids=0,1 row-size=60B cardinality=1.46K
 |  in pipelines: 00(GETNEXT), 01(OPEN)
 |
 |--01:SCAN KUDU [functional_kudu.alltypestiny b]
 |     mem-estimate=3.00MB mem-reservation=0B thread-reservation=1
 |     tuple-ids=1 row-size=34B cardinality=8
 |     in pipelines: 01(GETNEXT)
 |
 00:SCAN KUDU [functional_kudu.alltypes a]
    runtime filters: RF007[min_max] -> a.tinyint_col
    mem-estimate=3.00MB mem-reservation=0B thread-reservation=1
    tuple-ids=0 row-size=26B cardinality=7.30K
    in pipelines: 00(GETNEXT)
 ====
 # Query with both Kudu and HDFS filter targets.
 select count(*) from functional_kudu.alltypes a, functional_parquet.alltypes b,
     functional_kudu.alltypes c
 where a.int_col = b.int_col and a.int_col = c.int_col
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=2.02GB mem-reservation=36.95MB thread-reservation=4 runtime-filters-memory=1.00MB
 PLAN-ROOT SINK
 |  output exprs: count(*)
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 05:AGGREGATE [FINALIZE]
 |  output: count(*)
 |  mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
 |  tuple-ids=3 row-size=8B cardinality=1
 |  in pipelines: 05(GETNEXT), 00(OPEN)
 |
 04:HASH JOIN [INNER JOIN]
 |  hash predicates: a.int_col = c.int_col
 |  fk/pk conjuncts: none
 |  runtime filters: RF000[bloom] <- c.int_col, RF001[min_max] <- c.int_col
 |  mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
 |  tuple-ids=0,1,2 row-size=12B cardinality=5.33M
 |  in pipelines: 00(GETNEXT), 02(OPEN)
 |
 |--02:SCAN KUDU [functional_kudu.alltypes c]
 |     mem-estimate=768.00KB mem-reservation=0B thread-reservation=1
 |     tuple-ids=2 row-size=4B cardinality=7.30K
 |     in pipelines: 02(GETNEXT)
 |
 03:HASH JOIN [INNER JOIN]
 |  hash predicates: a.int_col = b.int_col
 |  fk/pk conjuncts: assumed fk/pk
 |  runtime filters: RF003[min_max] <- b.int_col
 |  mem-estimate=2.00GB mem-reservation=34.00MB spill-buffer=2.00MB thread-reservation=0
 |  tuple-ids=0,1 row-size=8B cardinality=7.30K
 |  in pipelines: 00(GETNEXT), 01(OPEN)
 |
 |--01:SCAN HDFS [functional_parquet.alltypes b]
 |     HDFS partitions=24/24 files=24 size=200.33KB
 |     runtime filters: RF000[bloom] -> b.int_col
 |     stored statistics:
 |       table: rows=unavailable size=unavailable
 |       partitions: 0/24 rows=unavailable
 |       columns: unavailable
 |     extrapolated-rows=disabled max-scan-range-rows=unavailable
 |     mem-estimate=16.00MB mem-reservation=16.00KB thread-reservation=1
 |     tuple-ids=1 row-size=4B cardinality=unavailable
 |     in pipelines: 01(GETNEXT)
 |
 00:SCAN KUDU [functional_kudu.alltypes a]
    runtime filters: RF001[min_max] -> a.int_col, RF003[min_max] -> a.int_col
    mem-estimate=768.00KB mem-reservation=0B thread-reservation=1
    tuple-ids=0 row-size=4B cardinality=7.30K
    in pipelines: 00(GETNEXT)
 ====
	# basic filter
	select count(*) from functional_kudu.alltypes a, functional_kudu.alltypestiny b
	where a.int_col = b.tinyint_col + 1 and a.string_col = b.string_col
	---- PLAN
	F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
	\| Per-Host Resources: mem-estimate=13.44MB mem-reservation=1.94MB thread-reservation=3
	PLAN-ROOT SINK
	\| output exprs: count(*)
	\| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\|
	03:AGGREGATE [FINALIZE]
	\| output: count(*)
	\| mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
	\| tuple-ids=2 row-size=8B cardinality=1
	\| in pipelines: 03(GETNEXT), 00(OPEN)
	\|
	02:HASH JOIN [INNER JOIN]
	\| hash predicates: a.string_col = b.string_col, a.int_col = b.tinyint_col + 1
	\| fk/pk conjuncts: none
	\| runtime filters: RF002[min_max] <- b.string_col, RF003[min_max] <- b.tinyint_col + 1
	\| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
	\| tuple-ids=0,1 row-size=39B cardinality=5.84K
	\| in pipelines: 00(GETNEXT), 01(OPEN)
	\|
	\|--01:SCAN KUDU [functional_kudu.alltypestiny b]
	\| mem-estimate=1.50MB mem-reservation=0B thread-reservation=1
	\| tuple-ids=1 row-size=18B cardinality=8
	\| in pipelines: 01(GETNEXT)
	\|
	00:SCAN KUDU [functional_kudu.alltypes a]
	runtime filters: RF002[min_max] -> a.string_col, RF003[min_max] -> a.int_col
	mem-estimate=1.50MB mem-reservation=0B thread-reservation=1
	tuple-ids=0 row-size=21B cardinality=7.30K
	in pipelines: 00(GETNEXT)
	====
	# Filters are not created if the target isn't a bare Kudu column or if 'is (not) distinct'
	# is used.
	select count(*) from functional_kudu.alltypes a, functional_kudu.alltypestiny b
	where a.int_col + 1 = b.int_col
	and a.string_col is distinct from b.string_col
	and a.tinyint_col is not distinct from b.tinyint_col
	---- PLAN
	F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
	\| Per-Host Resources: mem-estimate=14.19MB mem-reservation=1.94MB thread-reservation=3
	PLAN-ROOT SINK
	\| output exprs: count(*)
	\| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\|
	03:AGGREGATE [FINALIZE]
	\| output: count(*)
	\| mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
	\| tuple-ids=2 row-size=8B cardinality=1
	\| in pipelines: 03(GETNEXT), 00(OPEN)
	\|
	02:HASH JOIN [INNER JOIN]
	\| hash predicates: a.tinyint_col IS NOT DISTINCT FROM b.tinyint_col, a.int_col + 1 = b.int_col
	\| fk/pk conjuncts: assumed fk/pk
	\| other predicates: a.string_col IS DISTINCT FROM b.string_col
	\| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
	\| tuple-ids=0,1 row-size=44B cardinality=7.30K
	\| in pipelines: 00(GETNEXT), 01(OPEN)
	\|
	\|--01:SCAN KUDU [functional_kudu.alltypestiny b]
	\| mem-estimate=2.25MB mem-reservation=0B thread-reservation=1
	\| tuple-ids=1 row-size=22B cardinality=8
	\| in pipelines: 01(GETNEXT)
	\|
	00:SCAN KUDU [functional_kudu.alltypes a]
	mem-estimate=2.25MB mem-reservation=0B thread-reservation=1
	tuple-ids=0 row-size=22B cardinality=7.30K
	in pipelines: 00(GETNEXT)
	====
	# Filters are only assigned when the target expr is cast if its an implicit integer cast.
	select count(*) from functional_kudu.alltypes a, functional_kudu.alltypestiny b
	where a.tinyint_col = b.bigint_col
	and cast(a.int_col as smallint) = b.smallint_col
	and a.string_col = b.timestamp_col
	and cast(a.float_col as double) = b.double_col
	---- PLAN
	F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
	\| Per-Host Resources: mem-estimate=14.94MB mem-reservation=1.94MB thread-reservation=3
	PLAN-ROOT SINK
	\| output exprs: count(*)
	\| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\|
	03:AGGREGATE [FINALIZE]
	\| output: count(*)
	\| mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
	\| tuple-ids=2 row-size=8B cardinality=1
	\| in pipelines: 03(GETNEXT), 00(OPEN)
	\|
	02:HASH JOIN [INNER JOIN]
	\| hash predicates: CAST(a.float_col AS DOUBLE) = b.double_col, CAST(a.int_col AS SMALLINT) = b.smallint_col, a.string_col = b.timestamp_col, a.tinyint_col = b.bigint_col
	\| fk/pk conjuncts: a.string_col = b.timestamp_col, a.tinyint_col = b.bigint_col
	\| runtime filters: RF007[min_max] <- b.bigint_col
	\| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
	\| tuple-ids=0,1 row-size=60B cardinality=1.46K
	\| in pipelines: 00(GETNEXT), 01(OPEN)
	\|
	\|--01:SCAN KUDU [functional_kudu.alltypestiny b]
	\| mem-estimate=3.00MB mem-reservation=0B thread-reservation=1
	\| tuple-ids=1 row-size=34B cardinality=8
	\| in pipelines: 01(GETNEXT)
	\|
	00:SCAN KUDU [functional_kudu.alltypes a]
	runtime filters: RF007[min_max] -> a.tinyint_col
	mem-estimate=3.00MB mem-reservation=0B thread-reservation=1
	tuple-ids=0 row-size=26B cardinality=7.30K
	in pipelines: 00(GETNEXT)
	====
	# Query with both Kudu and HDFS filter targets.
	select count(*) from functional_kudu.alltypes a, functional_parquet.alltypes b,
	functional_kudu.alltypes c
	where a.int_col = b.int_col and a.int_col = c.int_col
	---- PLAN
	F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
	\| Per-Host Resources: mem-estimate=2.02GB mem-reservation=36.95MB thread-reservation=4 runtime-filters-memory=1.00MB
	PLAN-ROOT SINK
	\| output exprs: count(*)
	\| mem-estimate=0B mem-reservation=0B thread-reservation=0
	\|
	05:AGGREGATE [FINALIZE]
	\| output: count(*)
	\| mem-estimate=10.00MB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0
	\| tuple-ids=3 row-size=8B cardinality=1
	\| in pipelines: 05(GETNEXT), 00(OPEN)
	\|
	04:HASH JOIN [INNER JOIN]
	\| hash predicates: a.int_col = c.int_col
	\| fk/pk conjuncts: none
	\| runtime filters: RF000[bloom] <- c.int_col, RF001[min_max] <- c.int_col
	\| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
	\| tuple-ids=0,1,2 row-size=12B cardinality=5.33M
	\| in pipelines: 00(GETNEXT), 02(OPEN)
	\|
	\|--02:SCAN KUDU [functional_kudu.alltypes c]
	\| mem-estimate=768.00KB mem-reservation=0B thread-reservation=1
	\| tuple-ids=2 row-size=4B cardinality=7.30K
	\| in pipelines: 02(GETNEXT)
	\|
	03:HASH JOIN [INNER JOIN]
	\| hash predicates: a.int_col = b.int_col
	\| fk/pk conjuncts: assumed fk/pk
	\| runtime filters: RF003[min_max] <- b.int_col
	\| mem-estimate=2.00GB mem-reservation=34.00MB spill-buffer=2.00MB thread-reservation=0
	\| tuple-ids=0,1 row-size=8B cardinality=7.30K
	\| in pipelines: 00(GETNEXT), 01(OPEN)
	\|
	\|--01:SCAN HDFS [functional_parquet.alltypes b]
	\| HDFS partitions=24/24 files=24 size=200.33KB
	\| runtime filters: RF000[bloom] -> b.int_col
	\| stored statistics:
	\| table: rows=unavailable size=unavailable
	\| partitions: 0/24 rows=unavailable
	\| columns: unavailable
	\| extrapolated-rows=disabled max-scan-range-rows=unavailable
	\| mem-estimate=16.00MB mem-reservation=16.00KB thread-reservation=1
	\| tuple-ids=1 row-size=4B cardinality=unavailable
	\| in pipelines: 01(GETNEXT)
	\|
	00:SCAN KUDU [functional_kudu.alltypes a]
	runtime filters: RF001[min_max] -> a.int_col, RF003[min_max] -> a.int_col
	mem-estimate=768.00KB mem-reservation=0B thread-reservation=1
	tuple-ids=0 row-size=4B cardinality=7.30K
	in pipelines: 00(GETNEXT)
	====