blob: 66391a5a7c201d9b2f28172688f92bca25fa93a8 [file] [log] [blame]
====
---- QUERY
# Test cases that are only valid for PHJ joins are in this file.
####################################################
# Test case 1: PHJ nodes that spill should still produce filters.
# Run this for Parquet only to avoid variable memory
# consumption / spilling behaviour.
####################################################
SET BUFFER_POOL_LIMIT=40m;
SET RUNTIME_FILTER_MODE=GLOBAL;
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_BLOOM_FILTER_SIZE=16M;
select STRAIGHT_JOIN count(a.l_comment)
from tpch_parquet.lineitem a join tpch_parquet.lineitem b
on a.l_comment = b.l_comment;
---- RESULTS
51495713
---- TYPES
BIGINT
---- RUNTIME_PROFILE
row_regex: .*SpilledPartitions: [1-9]\d* .*
row_regex: .*Rows processed: 16.38K.*
row_regex: .*Rows rejected: 0 .*
row_regex: .*1 of 1 Runtime Filter Published.*
====
---- QUERY
####################################################
# Test case 2: Filters are still effective inside subplans
# (in certain cases). The query has a HJ-scan pair inside a
# subplan (on the LHS).
####################################################
SET RUNTIME_FILTER_MODE=GLOBAL;
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
select straight_join count(1)
from alltypes a join complextypestbl b
# b.id + 10 confuses planner, so it doesn't think it
# can transitively push a.id < 20 to scan of a.
on a.id = b.id + 10 join b.int_array where b.id < 10
---- RESULTS
10
---- RUNTIME_PROFILE
row_regex: .*1 of 1 Runtime Filter Published.*
row_regex: .*Rows rejected: 2.43K \(2432\).*
====
---- QUERY
####################################################
# Test case 3: Filters will not be used if they exceed
# the configured memory limit on the coordinator.
# To test this, we need to construct a query where memory
# consumption on the coordinator exceeds MEM_LIMIT, but
# not on the backends (because otherwise they will disable
# the filters through another path). We set MEM_LIMIT to
# the minimum possible then set filter size to be roughly
# half that: since the coordinator must aggregate two of
# these filters (and indeed must create one as well), it
# will exceed the memory limit. This is checked for
# indirectly by confirming that the filter had no effect
# (when usually it would be selective).
####################################################
SET RUNTIME_FILTER_MODE=GLOBAL;
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MIN_SIZE=128MB;
SET RUNTIME_FILTER_MAX_SIZE=500MB;
SET MEM_LIMIT=140MB;
select STRAIGHT_JOIN * from alltypes a join [SHUFFLE] alltypes b
on a.month = b.id and b.int_col = -3
---- RESULTS
---- RUNTIME_PROFILE
row_regex: .*Filter 0 \(128.00 MB\).*
row_regex: .*Files processed: 8.*
row_regex: .*Files rejected: 0.*
====
---- QUERY
# Confirm that with broadcast join, memory limit is not hit.
SET RUNTIME_FILTER_MODE=GLOBAL;
SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
SET RUNTIME_FILTER_MIN_SIZE=128MB;
SET RUNTIME_FILTER_MAX_SIZE=500MB;
# Allocate enough memory for the join + filter + scan
SET MEM_LIMIT=170MB;
select STRAIGHT_JOIN * from alltypes a join [BROADCAST] alltypes b
on a.month = b.id and b.int_col = -3
---- RESULTS
---- RUNTIME_PROFILE
row_regex: .*Filter 0 \(128.00 MB\).*
row_regex: .*Files processed: 8.*
row_regex: .*Files rejected: 8.*
====