| ==== |
| ---- QUERY |
| #################################################### |
| # Test case 1: broadcast join. |
| # Without filtering, expect 7300 / 3 = 2433 rows per scan fragment. |
| # With filtering, expect 618 / 3 = 206 rows to be read from 3 files (one per scan). |
| #################################################### |
| |
| # Basic filtering use case: filter p's partition columns thanks to an implicit |
| # relationship between join column and build-side predicates. |
| SET RUNTIME_FILTER_MODE=OFF; |
| select STRAIGHT_JOIN count(*) from alltypes p join [BROADCAST] alltypestiny b |
| on p.month = b.int_col and b.month = 1 and b.string_col = "1" |
| ---- RESULTS |
| 620 |
| ---- RUNTIME_PROFILE |
| row_regex: .*RowsRead: 2.43K .* |
| ==== |
| ---- QUERY |
| # Now turn on local filtering: we expect to see a reduction in scan volume. |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MODE=LOCAL; |
| select STRAIGHT_JOIN count(*) from alltypes p join [BROADCAST] alltypestiny b |
| on p.month = b.int_col and b.month = 1 and b.string_col = "1" |
| ---- RESULTS |
| 620 |
| ---- RUNTIME_PROFILE |
| row_regex: .*Files rejected: 7 \(7\).* |
| ==== |
| |
| |
| ---- QUERY |
| #################################################### |
| # Test case 2: shuffle join - test for filter propagation (or lack thereof in LOCAL mode). |
| # Without filtering, expect 7300 / 3 = 2433 rows per scan fragment. |
| # With filtering, expect 618 / 3 = 206 rows to be read from 3 files (one per scan). |
| #################################################### |
| |
| # Local mode. Filters won't be propagated to scan, so scans will read all rows. |
| # Still give enough time for filters to show up (even if they won't) |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MODE=LOCAL; |
| select STRAIGHT_JOIN count(*) from alltypes p join [SHUFFLE] alltypestiny b |
| on p.month = b.int_col and b.month = 1 and b.string_col = "1" |
| ---- RESULTS |
| 620 |
| ---- RUNTIME_PROFILE |
| row_regex: .*RowsRead: 2.43K .* |
| ==== |
| ---- QUERY |
| # Shuffle join, global mode. Expect filters to be propagated. |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MODE=GLOBAL; |
| select STRAIGHT_JOIN count(*) from alltypes p join [SHUFFLE] alltypestiny b |
| on p.month = b.int_col and b.month = 1 and b.string_col = "1" |
| ---- RESULTS |
| 620 |
| ---- RUNTIME_PROFILE |
| row_regex: .*Files rejected: 7 \(7\).* |
| ==== |
| |
| |
| ---- QUERY |
| #################################################### |
| # Test case 3: two-hop filter chain with BROADCAST |
| # joins. |
| # Without filtering in left-most scan, expect 7300 / 3 = 2433 rows. |
| # With filtering, expect 0 rows as all files are rejected by the partition |
| # column filter. |
| #################################################### |
| |
| # Local mode. Only the left-most scan will receive its filter, but since the scan of 'b' |
| # will not, the lack of predicates means there is no filter effect. |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MODE=LOCAL; |
| select STRAIGHT_JOIN count(*) from alltypes a |
| join [BROADCAST] alltypes b |
| join [BROADCAST] alltypestiny c |
| where c.month = 13 and b.year = c.year and a.month = b.month |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| row_regex: .*Files rejected: 0 .* |
| ==== |
| ---- QUERY |
| # Global mode. Scan of 'b' will receive highly effective filter, and will propagate that |
| # to left-most scan. |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MODE=GLOBAL; |
| select STRAIGHT_JOIN count(*) from alltypes a |
| join [BROADCAST] alltypes b |
| join [BROADCAST] alltypestiny c |
| where c.month = 13 and b.year = c.year and a.month = b.month |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| row_regex: .*Files rejected: 8 .* |
| ==== |
| |
| |
| ---- QUERY |
| #################################################### |
| # Test case 4: complex filter expressions. The join predicate matches nothing, but |
| # isn't simplified by the planner before execution. |
| # With local filtering, expect 0 rows, as all are rejected by partition pruning. |
| #################################################### |
| |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MODE=LOCAL; |
| select STRAIGHT_JOIN count(*) from alltypes a |
| join [BROADCAST] alltypestiny b |
| on greatest(a.month, 1000) = greatest(b.month, 2000) |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| row_regex: .*RowsRead: 0 .* |
| row_regex: .*Files rejected: 8 .* |
| ==== |
| |
| |
| ---- QUERY |
| #################################################### |
| # Test case 5: filters with local target don't get broadcast. |
| #################################################### |
| |
| # Local mode. Coordinator should report 0 filter updates received. |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MODE=LOCAL; |
| select STRAIGHT_JOIN count(*) from alltypes a |
| join [BROADCAST] alltypestiny b |
| on a.month = b.month + 10000; |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| row_regex: .*FiltersReceived: 0 .* |
| ==== |
| ---- QUERY |
| # Global mode. Coordinator should report 0 filter updates received. |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MODE=GLOBAL; |
| select STRAIGHT_JOIN count(*) from alltypes a |
| join [BROADCAST] alltypestiny b |
| on a.month = b.month + 10000; |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| row_regex: .*FiltersReceived: 0 .* |
| row_regex: .*Files rejected: 8 .* |
| ==== |
| |
| |
| ---- QUERY |
| #################################################### |
| # Test case 6: filters with non-local target get broadcast in GLOBAL mode only. |
| #################################################### |
| |
| # Local mode. Coordinator should report 0 filter updates received. |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MODE=LOCAL; |
| select STRAIGHT_JOIN count(*) from alltypes a |
| join [SHUFFLE] alltypestiny b |
| on a.month = b.month + 10000; |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| row_regex: .*FiltersReceived: 0 .* |
| ==== |
| ---- QUERY |
| # Global mode. Coordinator should report 1 filter update per backend. |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MODE=GLOBAL; |
| select STRAIGHT_JOIN count(*) from alltypes a |
| join [SHUFFLE] alltypestiny b |
| on a.month = b.month + 10000; |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| row_regex: .*FiltersReceived: 3 .* |
| ==== |
| |
| |
| ---- QUERY |
| #################################################### |
| # Test case 7: filters with target exprs bound by > 1 slotref. |
| # Expect all but one partition to be filtered out by join expr. |
| #################################################### |
| |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MODE=LOCAL; |
| select STRAIGHT_JOIN count(*) from alltypes a |
| join [BROADCAST] alltypestiny b |
| on a.month + a.year = b.year + 1; |
| ---- RESULTS |
| 2480 |
| ---- RUNTIME_PROFILE |
| row_regex: .*Files rejected: 7 .* |
| ==== |
| |
| |
| ---- QUERY |
| #################################################### |
| # Test case 8: filters do not pass through LOJ. |
| #################################################### |
| |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MODE=GLOBAL; |
| select STRAIGHT_JOIN count(*) from alltypes a |
| LEFT OUTER join alltypestiny b |
| on a.month = b.year |
| ---- RESULTS |
| 7300 |
| ---- RUNTIME_PROFILE |
| row_regex: .*RowsReturned: 2.43K .* |
| ==== |
| |
| |
| ---- QUERY |
| #################################################### |
| # Test case 9: filters do pass through ROJ. |
| # All partitions will be filtered out by the join condition. |
| #################################################### |
| |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MODE=GLOBAL; |
| select STRAIGHT_JOIN count(*) from alltypes a |
| RIGHT OUTER join alltypestiny b |
| on a.month = b.year |
| ---- RESULTS |
| 8 |
| ---- RUNTIME_PROFILE |
| row_regex: .*Files rejected: 8 .* |
| ==== |
| |
| ---- QUERY |
| #################################################### |
| # Test case 10: filters do not pass through FOJ. |
| #################################################### |
| |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MODE=GLOBAL; |
| select STRAIGHT_JOIN count(*) from alltypes a |
| FULL OUTER join alltypestiny b |
| on a.month = b.year |
| ---- RESULTS |
| 7308 |
| ---- RUNTIME_PROFILE |
| row_regex: .*RowsReturned: 2.43K .* |
| ==== |
| |
| |
| ---- QUERY |
| #################################################### |
| # Test case 11: filters with high expected FP rate get disabled. |
| # To trigger this path, we have to trick the planner into estimating a too-small |
| # build-side cardinality, which will cause the BF size to be estimated low (and therefore |
| # the FP rate to be high). We do this by using predicates that are completely unselective, |
| # but which the planner thinks have relatively high selectivity. |
| #################################################### |
| |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MODE=GLOBAL; |
| SET RUNTIME_FILTER_MAX_SIZE=4K; |
| select STRAIGHT_JOIN count(*) from alltypes a |
| join [BROADCAST] |
| # Build-side needs to be sufficiently large to trigger FP check. |
| (select id, int_col from alltypes UNION ALL select id, int_col from alltypes) b |
| on a.id = b.id |
| # Predicates that are always true (but planner thinks are selective) |
| where (b.id - b.id) < 1 AND (b.int_col - b.int_col) < 1; |
| ---- RESULTS |
| 14600 |
| ---- RUNTIME_PROFILE |
| row_regex: .*0 of 1 Runtime Filter Published, 1 Disabled.* |
| row_regex: .*Rows rejected: 0 .* |
| ==== |
| |
| |
| ---- QUERY |
| #################################################### |
| # Test case 12: join predicates with NULL values. |
| # Build-side selects one row from alltypes agg where day IS NULL, and joins with all rows |
| # in probe side with day IS NULL. |
| # Expect with filtering that 1K rows are returned, with an average of 333 per scan node |
| # per fragment instance, and three files rejected per scan. |
| #################################################### |
| |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MODE=GLOBAL; |
| select STRAIGHT_JOIN count(*) from alltypesagg a |
| join alltypesagg b |
| on a.day <=> b.day |
| where b.int_col IS NULL and b.id = 0 and b.day IS DISTINCT FROM 1 |
| ---- RESULTS |
| 1000 |
| ---- RUNTIME_PROFILE |
| row_regex: .*Files rejected: 3 .* |
| ==== |
| |
| |
| ---- QUERY |
| #################################################### |
| # Test case 13: coordinator fragment produces filters |
| # In this esoteric query plan, the coordinator fragment has a hash |
| # join in its root, which produces filters for the scan of t1. |
| #################################################### |
| |
| set RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| set RUNTIME_FILTER_MODE=GLOBAL; |
| with t1 as (select month x, bigint_col y from alltypes limit 7300), |
| t2 as (select int_col x, bigint_col y from alltypestiny limit 2) |
| select count(*) from t1, t2 where t1.x = t2.x |
| ---- RESULTS |
| 620 |
| ---- RUNTIME_PROFILE |
| row_regex: .*Files rejected: 7 .* |
| ==== |
| |
| |
| ---- QUERY |
| #################################################### |
| # Test case 14: When NUM_NODES=1, all filters should be local. |
| # Regression test for IMPALA-3245. |
| #################################################### |
| set NUM_NODES=1; |
| set RUNTIME_FILTER_MODE=GLOBAL; |
| select STRAIGHT_JOIN count(a.id) from alltypes a |
| join [SHUFFLE] alltypes b on a.id = b.id; |
| ---- RESULTS |
| 7300 |
| ==== |
| |
| |
| ---- QUERY |
| #################################################### |
| # Test case 15: Filter sizes change according to their NDV |
| #################################################### |
| SET RUNTIME_FILTER_MODE=GLOBAL; |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MIN_SIZE=4KB; |
| with l as (select * from tpch.lineitem UNION ALL select * from tpch.lineitem) |
| select STRAIGHT_JOIN count(*) from (select * from tpch.lineitem a LIMIT 1) a |
| join (select * from l LIMIT 1) b on a.l_orderkey = -b.l_orderkey; |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| row_regex: .*1 of 1 Runtime Filter Published.* |
| row_regex: .*Filter 0 \(4.00 KB\).* |
| ==== |
| ---- QUERY |
| SET RUNTIME_FILTER_MODE=GLOBAL; |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MIN_SIZE=4KB; |
| with l as (select * from tpch.lineitem UNION ALL select * from tpch.lineitem) |
| select STRAIGHT_JOIN count(*) from (select * from tpch.lineitem a LIMIT 1) a |
| join (select * from l LIMIT 500000) b on a.l_orderkey = -b.l_orderkey; |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| row_regex: .*1 of 1 Runtime Filter Published.* |
| row_regex: .*Filter 0 \(256.00 KB\).* |
| ==== |
| ---- QUERY |
| SET RUNTIME_FILTER_MODE=GLOBAL; |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MIN_SIZE=4KB; |
| with l as (select * from tpch.lineitem UNION ALL select * from tpch.lineitem) |
| select STRAIGHT_JOIN count(*) from (select * from tpch.lineitem a LIMIT 1) a |
| join (select * from l LIMIT 1000000) b on a.l_orderkey = -b.l_orderkey; |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| row_regex: .*1 of 1 Runtime Filter Published.* |
| row_regex: .*Filter 0 \(512.00 KB\).* |
| ==== |
| ---- QUERY |
| SET RUNTIME_FILTER_MODE=GLOBAL; |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MIN_SIZE=4KB; |
| with l as (select * from tpch.lineitem UNION ALL select * from tpch.lineitem) |
| select STRAIGHT_JOIN count(*) from (select * from tpch.lineitem a LIMIT 1) a |
| join (select * from l LIMIT 2000000) b on a.l_orderkey = -b.l_orderkey; |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| row_regex: .*1 of 1 Runtime Filter Published.* |
| row_regex: .*Filter 0 \(1.00 MB\).* |
| ==== |
| |
| |
| ---- QUERY |
| #################################################### |
| # Test case 16: Filter sizes respect query options |
| #################################################### |
| SET RUNTIME_FILTER_MODE=GLOBAL; |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MIN_SIZE=8KB; |
| SET RUNTIME_FILTER_MAX_SIZE=8KB; |
| # This query would produce a 4KB filter without setting the minimum size. |
| select STRAIGHT_JOIN count(*) from alltypes a join [SHUFFLE] alltypes b on a.id = b.id; |
| ---- RESULTS |
| 7300 |
| ---- RUNTIME_PROFILE |
| row_regex: .*1 of 1 Runtime Filter Published.* |
| row_regex: .*Filter 0 \(8.00 KB\).* |
| ==== |
| ---- QUERY |
| # Check that filter sizes are rounded up to power-of-two |
| SET RUNTIME_FILTER_MIN_SIZE=6000B; |
| SET RUNTIME_FILTER_MAX_SIZE=6000B; |
| select STRAIGHT_JOIN count(*) from alltypes a join [SHUFFLE] alltypes b on a.id = b.id; |
| ---- RESULTS |
| 7300 |
| ---- RUNTIME_PROFILE |
| row_regex: .*1 of 1 Runtime Filter Published.* |
| row_regex: .*Filter 0 \(8.00 KB\).* |
| ==== |
| ---- QUERY |
| SET RUNTIME_FILTER_MODE=GLOBAL; |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MAX_SIZE=8192; |
| # Query would produce a 512KB filter without setting the max |
| with l as (select * from tpch.lineitem UNION ALL select * from tpch.lineitem) |
| select STRAIGHT_JOIN count(*) from (select * from tpch.lineitem a LIMIT 1) a |
| join (select * from l LIMIT 1000000) b on a.l_orderkey = -b.l_orderkey; |
| ---- RUNTIME_PROFILE |
| row_regex: .*0 of 1 Runtime Filter Published.* |
| row_regex: .*Filter 0 \(8.00 KB\).* |
| ==== |
| |
| |
| ---- QUERY |
| #################################################### |
| # Test case 17: Filter with two targers (one local, one remote) |
| # In this three-way join the filter produced by the top-level |
| # join has both a local and a remote target. |
| #################################################### |
| set RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| set RUNTIME_FILTER_MODE=GLOBAL; |
| select straight_join count(*) |
| from alltypes a join [BROADCAST] alltypessmall c |
| on a.month = c.month join [BROADCAST] alltypesagg b |
| on a.month = b.id where b.int_col < 0; |
| ---- RESULTS |
| 0 |
| ==== |
| |
| |
| ---- QUERY |
| #################################################### |
| # Test case 18: Runtime filter pushed to all union operands |
| #################################################### |
| set RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| set RUNTIME_FILTER_MODE=GLOBAL; |
| select straight_join count(*) |
| from (select month, year from alltypes |
| union all |
| select month, year from alltypes) a |
| join alltypessmall b on a.month = b.month |
| where b.int_col = 1; |
| ---- RESULTS |
| 14400 |
| ==== |