blob: 0f877767f725618d5ceec9323c5aebb952dbd979 [file] [log] [blame]
====
---- QUERY
####################################################
# Test case 1: broadcast join.
# Without filtering, expect 7300 / 3 = 2433 rows per scan fragment.
# With filtering, expect 618 / 3 = 206 rows to be read from 3 files (one per scan).
####################################################
# Basic filtering use case: filter p's partition columns thanks to an implicit
# relationship between join column and build-side predicates.
SET RUNTIME_FILTER_MODE=OFF;
select STRAIGHT_JOIN count(*) from alltypes p join [BROADCAST] alltypestiny b
on p.month = b.int_col and b.month = 1 and b.string_col = "1"
---- RESULTS
620
---- RUNTIME_PROFILE
row_regex: .*RowsRead: 2.43K .*
====
---- QUERY
# Now turn on local filtering: we expect to see a reduction in scan volume.
# TODO: improve the Kudu profile output once KUDU-2162 is fixed.
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SET RUNTIME_FILTER_MODE=LOCAL;
select STRAIGHT_JOIN count(*) from alltypes p join [BROADCAST] alltypestiny b
on p.month = b.int_col and b.month = 1 and b.string_col = "1"
---- RESULTS
620
---- RUNTIME_PROFILE
row_regex: .*Files rejected: 7 \(7\).*
---- RUNTIME_PROFILE: table_format=kudu
row_regex: .*RowsRead: 206 .*
====
---- QUERY
####################################################
# Test case 2: shuffle join - test for filter propagation (or lack thereof in LOCAL mode).
# Without filtering, expect 7300 / 3 = 2433 rows per scan fragment.
# With filtering, expect 618 / 3 = 206 rows to be read from 3 files (one per scan).
####################################################
# Local mode. Filters won't be propagated to scan, so scans will read all rows.
# Still give enough time for filters to show up (even if they won't)
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SET RUNTIME_FILTER_MODE=LOCAL;
select STRAIGHT_JOIN count(*) from alltypes p join [SHUFFLE] alltypestiny b
on p.month = b.int_col and b.month = 1 and b.string_col = "1"
---- RESULTS
620
---- RUNTIME_PROFILE
row_regex: .*RowsRead: 2.43K .*
====
---- QUERY
# Shuffle join, global mode. Expect filters to be propagated.
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SET RUNTIME_FILTER_MODE=GLOBAL;
select STRAIGHT_JOIN count(*) from alltypes p join [SHUFFLE] alltypestiny b
on p.month = b.int_col and b.month = 1 and b.string_col = "1"
---- RESULTS
620
---- RUNTIME_PROFILE
row_regex: .*Files rejected: 7 \(7\).*
---- RUNTIME_PROFILE: table_format=kudu
row_regex: .*RowsRead: 206 .*
====
---- QUERY
####################################################
# Test case 3: two-hop filter chain with BROADCAST
# joins.
# Without filtering in left-most scan, expect 7300 / 3 = 2433 rows.
# With filtering, expect 0 rows as all files are rejected by the partition
# column filter.
####################################################
# Local mode. Only the left-most scan will receive its filter, but since the scan of 'b'
# will not, the lack of predicates means there is no filter effect.
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SET RUNTIME_FILTER_MODE=LOCAL;
select STRAIGHT_JOIN count(*) from alltypes a
join [BROADCAST] alltypes b
join [BROADCAST] alltypestiny c
where c.month = 13 and b.year = c.year and a.month = b.month
---- RESULTS
0
---- RUNTIME_PROFILE
row_regex: .*Files rejected: 0 .*
---- RUNTIME_PROFILE: table_format=kudu
row_regex: .*RowsRead: 2.43K .*
====
---- QUERY
# Global mode. Scan of 'b' will receive highly effective filter, and will propagate that
# to left-most scan.
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SET RUNTIME_FILTER_MODE=GLOBAL;
select STRAIGHT_JOIN count(*) from alltypes a
join [BROADCAST] alltypes b
join [BROADCAST] alltypestiny c
where c.month = 13 and b.year = c.year and a.month = b.month
---- RESULTS
0
---- RUNTIME_PROFILE
row_regex: .*Files rejected: 8 .*
---- RUNTIME_PROFILE: table_format=kudu
row_regex: .*RowsRead: 0 .*
row_regex: .*ScanRangesComplete: 0 .*
====
---- QUERY
####################################################
# Test case 4: complex filter expressions. The join predicate matches nothing, but
# isn't simplified by the planner before execution.
# With local filtering, expect 0 rows, as all are rejected by partition pruning.
# For Kudu, the target is not a single column, so no filters will be created.
####################################################
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SET RUNTIME_FILTER_MODE=LOCAL;
select STRAIGHT_JOIN count(*) from alltypes a
join [BROADCAST] alltypestiny b
on greatest(a.month, 1000) = greatest(b.month, 2000)
---- RESULTS
0
---- RUNTIME_PROFILE
row_regex: .*RowsRead: 0 .*
row_regex: .*Files rejected: 8 .*
---- RUNTIME_PROFILE: table_format=kudu
row_regex: .*RowsRead: 2.43K .*
====
---- QUERY
####################################################
# Test case 5: filters with local target don't get broadcast.
####################################################
# Local mode. Coordinator should report 0 filter updates received.
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SET RUNTIME_FILTER_MODE=LOCAL;
select STRAIGHT_JOIN count(*) from alltypes a
join [BROADCAST] alltypestiny b
on a.month = b.month + 10000;
---- RESULTS
0
---- RUNTIME_PROFILE
row_regex: .*FiltersReceived: 0 .*
====
---- QUERY
# Global mode. Coordinator should report 0 filter updates received.
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SET RUNTIME_FILTER_MODE=GLOBAL;
select STRAIGHT_JOIN count(*) from alltypes a
join [BROADCAST] alltypestiny b
on a.month = b.month + 10000;
---- RESULTS
0
---- RUNTIME_PROFILE
row_regex: .*FiltersReceived: 0 .*
row_regex: .*Files rejected: 8 .*
---- RUNTIME_PROFILE: table_format=kudu
row_regex: .*FiltersReceived: 0 .*
row_regex: .*RowsRead: 0 .*
====
---- QUERY
####################################################
# Test case 6: filters with non-local target get broadcast in GLOBAL mode only.
####################################################
# Local mode. Coordinator should report 0 filter updates received.
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SET RUNTIME_FILTER_MODE=LOCAL;
select STRAIGHT_JOIN count(*) from alltypes a
join [SHUFFLE] alltypestiny b
on a.month = b.month + 10000;
---- RESULTS
0
---- RUNTIME_PROFILE
row_regex: .*FiltersReceived: 0 .*
====
---- QUERY
# Global mode. Coordinator should report 1 filter update per backend.
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SET RUNTIME_FILTER_MODE=GLOBAL;
select STRAIGHT_JOIN count(*) from alltypes a
join [SHUFFLE] alltypestiny b
on a.month = b.month + 10000;
---- RESULTS
0
---- RUNTIME_PROFILE
row_regex: .*FiltersReceived: 3 .*
====
---- QUERY
####################################################
# Test case 7: filters with target exprs bound by > 1 slotref.
# Expect all but one partition to be filtered out by join expr.
# For Kudu, the target is not a single column, so no filters will be created.
####################################################
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SET RUNTIME_FILTER_MODE=LOCAL;
select STRAIGHT_JOIN count(*) from alltypes a
join [BROADCAST] alltypestiny b
on a.month + a.year = b.year + 1;
---- RESULTS
2480
---- RUNTIME_PROFILE
row_regex: .*Files rejected: 7 .*
---- RUNTIME_PROFILE: table_format=kudu
row_regex: .*RowsRead: 2.43K .*
====
---- QUERY
####################################################
# Test case 8: filters do not pass through LOJ.
####################################################
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SET RUNTIME_FILTER_MODE=GLOBAL;
select STRAIGHT_JOIN count(*) from alltypes a
LEFT OUTER join alltypestiny b
on a.month = b.year
---- RESULTS
7300
---- RUNTIME_PROFILE
row_regex: .*RowsReturned: 2.43K .*
====
---- QUERY
####################################################
# Test case 9: filters do pass through ROJ.
# All partitions will be filtered out by the join condition.
####################################################
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SET RUNTIME_FILTER_MODE=GLOBAL;
select STRAIGHT_JOIN count(*) from alltypes a
RIGHT OUTER join alltypestiny b
on a.month = b.year
---- RESULTS
8
---- RUNTIME_PROFILE
row_regex: .*Files rejected: 8 .*
---- RUNTIME_PROFILE: table_format=kudu
row_regex: .*RowsRead: 0 .*
====
---- QUERY
####################################################
# Test case 10: filters do not pass through FOJ.
####################################################
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SET RUNTIME_FILTER_MODE=GLOBAL;
select STRAIGHT_JOIN count(*) from alltypes a
FULL OUTER join alltypestiny b
on a.month = b.year
---- RESULTS
7308
---- RUNTIME_PROFILE
row_regex: .*RowsReturned: 2.43K .*
====
---- QUERY
####################################################
# Test case 11: join predicates with NULL values.
# Build-side selects one row from alltypes agg where day IS NULL, and joins with all rows
# in probe side with day IS NULL.
# Expect with filtering that 1K rows are returned, with an average of 333 per scan node
# per fragment instance, and three files rejected per scan.
# For Kudu, 'IS NOT DISTINCT' is not supported for runtime filters.
####################################################
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SET RUNTIME_FILTER_MODE=GLOBAL;
select STRAIGHT_JOIN count(*) from alltypesagg a
join alltypesagg b
on a.day <=> b.day
where b.int_col IS NULL and b.id = 0 and b.day IS DISTINCT FROM 1
---- RESULTS
1000
---- RUNTIME_PROFILE
row_regex: .*Files rejected: 3 .*
---- RUNTIME_PROFILE: table_format=kudu
row_regex: .*RowsRead: 3.67K .*
====
---- QUERY
####################################################
# Test case 12: coordinator fragment produces filters
# In this esoteric query plan, the coordinator fragment has a hash
# join in its root, which produces filters for the scan of t1.
####################################################
set RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
set RUNTIME_FILTER_MODE=GLOBAL;
with t1 as (select month x, bigint_col y from alltypes limit 7300),
t2 as (select distinct int_col x, bigint_col y from alltypestiny limit 2)
select count(*) from t1, t2 where t1.x = t2.x
---- RESULTS
620
---- RUNTIME_PROFILE
row_regex: .*Files rejected: 7 .*
---- RUNTIME_PROFILE: table_format=kudu
row_regex: .*RowsRead: 206 .*
====
---- QUERY
####################################################
# Test case 13: When NUM_NODES=1, all filters should be local.
# Regression test for IMPALA-3245.
####################################################
set NUM_NODES=1;
set RUNTIME_FILTER_MODE=GLOBAL;
select STRAIGHT_JOIN count(a.id) from alltypes a
join [SHUFFLE] alltypes b on a.id = b.id;
---- RESULTS
7300
====
---- QUERY
####################################################
# Test case 14: Filter with two targers (one local, one remote)
# In this three-way join the filter produced by the top-level
# join has both a local and a remote target.
####################################################
set RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
set RUNTIME_FILTER_MODE=GLOBAL;
select straight_join count(*)
from alltypes a join [BROADCAST] alltypessmall c
on a.month = c.month join [BROADCAST] alltypesagg b
on a.month = b.id where b.int_col < 0;
---- RESULTS
0
====
---- QUERY
####################################################
# Test case 15: Runtime filter pushed to all union operands
####################################################
set RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
set RUNTIME_FILTER_MODE=GLOBAL;
select straight_join count(*)
from (select month, year from alltypes
union all
select month, year from alltypes) a
join alltypessmall b on a.month = b.month
where b.int_col = 1;
---- RESULTS
14400
====
---- QUERY
####################################################
# Test case 16: Both HDFS and Kudu targets results in both bloom and min-max filters
# being generated
####################################################
set RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
set RUNTIME_FILTER_MODE=GLOBAL;
select straight_join count(*)
from functional_parquet.alltypes a join [BROADCAST] functional_kudu.alltypes b
join [BROADCAST] functional_parquet.alltypes c
where a.int_col = b.int_col and a.int_col = c.smallint_col * 2 and c.id < 100
---- RESULTS
26645000
====