blob: 1bc9b1279c1ccfc2faa14d5718f17e7c90e07473 [file] [log] [blame]
====
---- QUERY
####################################################
# Test case 1: broadcast join.
# Without filtering, expect 7300 / 3 = 2433 rows per scan fragment.
# With filtering, expect 618 / 3 = 206 rows to be read from 3 files (one per scan).
####################################################
# Basic filtering use case: filter p's partition columns thanks to an implicit
# relationship between join column and build-side predicates.
SET RUNTIME_FILTER_MODE=OFF;
select STRAIGHT_JOIN count(*) from alltypes p join [BROADCAST] alltypestiny b
on p.month = b.int_col and b.month = 1 and b.string_col = "1"
---- RESULTS
620
---- RUNTIME_PROFILE
row_regex: .*RowsRead: 2.43K .*
====
---- QUERY
# Now turn on local filtering: we expect to see a reduction in scan volume.
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MODE=LOCAL;
select STRAIGHT_JOIN count(*) from alltypes p join [BROADCAST] alltypestiny b
on p.month = b.int_col and b.month = 1 and b.string_col = "1"
---- RESULTS
620
---- RUNTIME_PROFILE
row_regex: .*Files rejected: 7 \(7\).*
====
---- QUERY
####################################################
# Test case 2: shuffle join - test for filter propagation (or lack thereof in LOCAL mode).
# Without filtering, expect 7300 / 3 = 2433 rows per scan fragment.
# With filtering, expect 618 / 3 = 206 rows to be read from 3 files (one per scan).
####################################################
# Local mode. Filters won't be propagated to scan, so scans will read all rows.
# Still give enough time for filters to show up (even if they won't)
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MODE=LOCAL;
select STRAIGHT_JOIN count(*) from alltypes p join [SHUFFLE] alltypestiny b
on p.month = b.int_col and b.month = 1 and b.string_col = "1"
---- RESULTS
620
---- RUNTIME_PROFILE
row_regex: .*RowsRead: 2.43K .*
====
---- QUERY
# Shuffle join, global mode. Expect filters to be propagated.
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MODE=GLOBAL;
select STRAIGHT_JOIN count(*) from alltypes p join [SHUFFLE] alltypestiny b
on p.month = b.int_col and b.month = 1 and b.string_col = "1"
---- RESULTS
620
---- RUNTIME_PROFILE
row_regex: .*Files rejected: 7 \(7\).*
====
---- QUERY
####################################################
# Test case 3: two-hop filter chain with BROADCAST
# joins.
# Without filtering in left-most scan, expect 7300 / 3 = 2433 rows.
# With filtering, expect 0 rows as all files are rejected by the partition
# column filter.
####################################################
# Local mode. Only the left-most scan will receive its filter, but since the scan of 'b'
# will not, the lack of predicates means there is no filter effect.
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MODE=LOCAL;
select STRAIGHT_JOIN count(*) from alltypes a
join [BROADCAST] alltypes b
join [BROADCAST] alltypestiny c
where c.month = 13 and b.year = c.year and a.month = b.month
---- RESULTS
0
---- RUNTIME_PROFILE
row_regex: .*Files rejected: 0 .*
====
---- QUERY
# Global mode. Scan of 'b' will receive highly effective filter, and will propagate that
# to left-most scan.
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MODE=GLOBAL;
select STRAIGHT_JOIN count(*) from alltypes a
join [BROADCAST] alltypes b
join [BROADCAST] alltypestiny c
where c.month = 13 and b.year = c.year and a.month = b.month
---- RESULTS
0
---- RUNTIME_PROFILE
row_regex: .*Files rejected: 8 .*
====
---- QUERY
####################################################
# Test case 4: complex filter expressions. The join predicate matches nothing, but
# isn't simplified by the planner before execution.
# With local filtering, expect 0 rows, as all are rejected by partition pruning.
####################################################
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MODE=LOCAL;
select STRAIGHT_JOIN count(*) from alltypes a
join [BROADCAST] alltypestiny b
on greatest(a.month, 1000) = greatest(b.month, 2000)
---- RESULTS
0
---- RUNTIME_PROFILE
row_regex: .*RowsRead: 0 .*
row_regex: .*Files rejected: 8 .*
====
---- QUERY
####################################################
# Test case 5: filters with local target don't get broadcast.
####################################################
# Local mode. Coordinator should report 0 filter updates received.
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MODE=LOCAL;
select STRAIGHT_JOIN count(*) from alltypes a
join [BROADCAST] alltypestiny b
on a.month = b.month + 10000;
---- RESULTS
0
---- RUNTIME_PROFILE
row_regex: .*FiltersReceived: 0 .*
====
---- QUERY
# Global mode. Coordinator should report 0 filter updates received.
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MODE=GLOBAL;
select STRAIGHT_JOIN count(*) from alltypes a
join [BROADCAST] alltypestiny b
on a.month = b.month + 10000;
---- RESULTS
0
---- RUNTIME_PROFILE
row_regex: .*FiltersReceived: 0 .*
row_regex: .*Files rejected: 8 .*
====
---- QUERY
####################################################
# Test case 6: filters with non-local target get broadcast in GLOBAL mode only.
####################################################
# Local mode. Coordinator should report 0 filter updates received.
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MODE=LOCAL;
select STRAIGHT_JOIN count(*) from alltypes a
join [SHUFFLE] alltypestiny b
on a.month = b.month + 10000;
---- RESULTS
0
---- RUNTIME_PROFILE
row_regex: .*FiltersReceived: 0 .*
====
---- QUERY
# Global mode. Coordinator should report 1 filter update per backend.
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MODE=GLOBAL;
select STRAIGHT_JOIN count(*) from alltypes a
join [SHUFFLE] alltypestiny b
on a.month = b.month + 10000;
---- RESULTS
0
---- RUNTIME_PROFILE
row_regex: .*FiltersReceived: 3 .*
====
---- QUERY
####################################################
# Test case 7: filters with target exprs bound by > 1 slotref.
# Expect all but one partition to be filtered out by join expr.
####################################################
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MODE=LOCAL;
select STRAIGHT_JOIN count(*) from alltypes a
join [BROADCAST] alltypestiny b
on a.month + a.year = b.year + 1;
---- RESULTS
2480
---- RUNTIME_PROFILE
row_regex: .*Files rejected: 7 .*
====
---- QUERY
####################################################
# Test case 8: filters do not pass through LOJ.
####################################################
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MODE=GLOBAL;
select STRAIGHT_JOIN count(*) from alltypes a
LEFT OUTER join alltypestiny b
on a.month = b.year
---- RESULTS
7300
---- RUNTIME_PROFILE
row_regex: .*RowsReturned: 2.43K .*
====
---- QUERY
####################################################
# Test case 9: filters do pass through ROJ.
# All partitions will be filtered out by the join condition.
####################################################
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MODE=GLOBAL;
select STRAIGHT_JOIN count(*) from alltypes a
RIGHT OUTER join alltypestiny b
on a.month = b.year
---- RESULTS
8
---- RUNTIME_PROFILE
row_regex: .*Files rejected: 8 .*
====
---- QUERY
####################################################
# Test case 10: filters do not pass through FOJ.
####################################################
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MODE=GLOBAL;
select STRAIGHT_JOIN count(*) from alltypes a
FULL OUTER join alltypestiny b
on a.month = b.year
---- RESULTS
7308
---- RUNTIME_PROFILE
row_regex: .*RowsReturned: 2.43K .*
====
---- QUERY
####################################################
# Test case 11: filters with high expected FP rate get disabled.
# To trigger this path, we have to trick the planner into estimating a too-small
# build-side cardinality, which will cause the BF size to be estimated low (and therefore
# the FP rate to be high). We do this by using predicates that are completely unselective,
# but which the planner thinks have relatively high selectivity.
####################################################
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MODE=GLOBAL;
SET RUNTIME_FILTER_MAX_SIZE=4K;
select STRAIGHT_JOIN count(*) from alltypes a
join [BROADCAST]
# Build-side needs to be sufficiently large to trigger FP check.
(select id, int_col from alltypes UNION ALL select id, int_col from alltypes) b
on a.id = b.id
# Predicates that are always true (but planner thinks are selective)
where (b.id - b.id) < 1 AND (b.int_col - b.int_col) < 1;
---- RESULTS
14600
---- RUNTIME_PROFILE
row_regex: .*0 of 1 Runtime Filter Published, 1 Disabled.*
row_regex: .*Rows rejected: 0 .*
====
---- QUERY
####################################################
# Test case 12: join predicates with NULL values.
# Build-side selects one row from alltypes agg where day IS NULL, and joins with all rows
# in probe side with day IS NULL.
# Expect with filtering that 1K rows are returned, with an average of 333 per scan node
# per fragment instance, and three files rejected per scan.
####################################################
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MODE=GLOBAL;
select STRAIGHT_JOIN count(*) from alltypesagg a
join alltypesagg b
on a.day <=> b.day
where b.int_col IS NULL and b.id = 0 and b.day IS DISTINCT FROM 1
---- RESULTS
1000
---- RUNTIME_PROFILE
row_regex: .*Files rejected: 3 .*
====
---- QUERY
####################################################
# Test case 13: coordinator fragment produces filters
# In this esoteric query plan, the coordinator fragment has a hash
# join in its root, which produces filters for the scan of t1.
####################################################
set RUNTIME_FILTER_WAIT_TIME_MS=30000;
set RUNTIME_FILTER_MODE=GLOBAL;
with t1 as (select month x, bigint_col y from alltypes limit 7300),
t2 as (select int_col x, bigint_col y from alltypestiny limit 2)
select count(*) from t1, t2 where t1.x = t2.x
---- RESULTS
620
---- RUNTIME_PROFILE
row_regex: .*Files rejected: 7 .*
====
---- QUERY
####################################################
# Test case 14: When NUM_NODES=1, all filters should be local.
# Regression test for IMPALA-3245.
####################################################
set NUM_NODES=1;
set RUNTIME_FILTER_MODE=GLOBAL;
select STRAIGHT_JOIN count(a.id) from alltypes a
join [SHUFFLE] alltypes b on a.id = b.id;
---- RESULTS
7300
====
---- QUERY
####################################################
# Test case 15: Filter sizes change according to their NDV
####################################################
SET RUNTIME_FILTER_MODE=GLOBAL;
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MIN_SIZE=4KB;
with l as (select * from tpch.lineitem UNION ALL select * from tpch.lineitem)
select STRAIGHT_JOIN count(*) from (select * from tpch.lineitem a LIMIT 1) a
join (select * from l LIMIT 1) b on a.l_orderkey = -b.l_orderkey;
---- RESULTS
0
---- RUNTIME_PROFILE
row_regex: .*1 of 1 Runtime Filter Published.*
row_regex: .*Filter 0 \(4.00 KB\).*
====
---- QUERY
SET RUNTIME_FILTER_MODE=GLOBAL;
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MIN_SIZE=4KB;
with l as (select * from tpch.lineitem UNION ALL select * from tpch.lineitem)
select STRAIGHT_JOIN count(*) from (select * from tpch.lineitem a LIMIT 1) a
join (select * from l LIMIT 500000) b on a.l_orderkey = -b.l_orderkey;
---- RESULTS
0
---- RUNTIME_PROFILE
row_regex: .*1 of 1 Runtime Filter Published.*
row_regex: .*Filter 0 \(256.00 KB\).*
====
---- QUERY
SET RUNTIME_FILTER_MODE=GLOBAL;
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MIN_SIZE=4KB;
with l as (select * from tpch.lineitem UNION ALL select * from tpch.lineitem)
select STRAIGHT_JOIN count(*) from (select * from tpch.lineitem a LIMIT 1) a
join (select * from l LIMIT 1000000) b on a.l_orderkey = -b.l_orderkey;
---- RESULTS
0
---- RUNTIME_PROFILE
row_regex: .*1 of 1 Runtime Filter Published.*
row_regex: .*Filter 0 \(512.00 KB\).*
====
---- QUERY
SET RUNTIME_FILTER_MODE=GLOBAL;
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MIN_SIZE=4KB;
with l as (select * from tpch.lineitem UNION ALL select * from tpch.lineitem)
select STRAIGHT_JOIN count(*) from (select * from tpch.lineitem a LIMIT 1) a
join (select * from l LIMIT 2000000) b on a.l_orderkey = -b.l_orderkey;
---- RESULTS
0
---- RUNTIME_PROFILE
row_regex: .*1 of 1 Runtime Filter Published.*
row_regex: .*Filter 0 \(1.00 MB\).*
====
---- QUERY
####################################################
# Test case 16: Filter sizes respect query options
####################################################
SET RUNTIME_FILTER_MODE=GLOBAL;
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MIN_SIZE=8KB;
SET RUNTIME_FILTER_MAX_SIZE=8KB;
# This query would produce a 4KB filter without setting the minimum size.
select STRAIGHT_JOIN count(*) from alltypes a join [SHUFFLE] alltypes b on a.id = b.id;
---- RESULTS
7300
---- RUNTIME_PROFILE
row_regex: .*1 of 1 Runtime Filter Published.*
row_regex: .*Filter 0 \(8.00 KB\).*
====
---- QUERY
# Check that filter sizes are rounded up to power-of-two
SET RUNTIME_FILTER_MIN_SIZE=6000B;
SET RUNTIME_FILTER_MAX_SIZE=6000B;
select STRAIGHT_JOIN count(*) from alltypes a join [SHUFFLE] alltypes b on a.id = b.id;
---- RESULTS
7300
---- RUNTIME_PROFILE
row_regex: .*1 of 1 Runtime Filter Published.*
row_regex: .*Filter 0 \(8.00 KB\).*
====
---- QUERY
SET RUNTIME_FILTER_MODE=GLOBAL;
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MAX_SIZE=8192;
# Query would produce a 512KB filter without setting the max
with l as (select * from tpch.lineitem UNION ALL select * from tpch.lineitem)
select STRAIGHT_JOIN count(*) from (select * from tpch.lineitem a LIMIT 1) a
join (select * from l LIMIT 1000000) b on a.l_orderkey = -b.l_orderkey;
---- RUNTIME_PROFILE
row_regex: .*0 of 1 Runtime Filter Published.*
row_regex: .*Filter 0 \(8.00 KB\).*
====
---- QUERY
####################################################
# Test case 17: Filter with two targers (one local, one remote)
# In this three-way join the filter produced by the top-level
# join has both a local and a remote target.
####################################################
set RUNTIME_FILTER_WAIT_TIME_MS=30000;
set RUNTIME_FILTER_MODE=GLOBAL;
select straight_join count(*)
from alltypes a join [BROADCAST] alltypessmall c
on a.month = c.month join [BROADCAST] alltypesagg b
on a.month = b.id where b.int_col < 0;
---- RESULTS
0
====
---- QUERY
####################################################
# Test case 18: Runtime filter pushed to all union operands
####################################################
set RUNTIME_FILTER_WAIT_TIME_MS=30000;
set RUNTIME_FILTER_MODE=GLOBAL;
select straight_join count(*)
from (select month, year from alltypes
union all
select month, year from alltypes) a
join alltypessmall b on a.month = b.month
where b.int_col = 1;
---- RESULTS
14400
====