| ==== |
| ---- QUERY |
| # Test cases that are only valid for PHJ joins are in this file. |
| |
| #################################################### |
| # Test case 1: PHJ nodes that spill should still produce filters. |
| # Run this for Parquet only to avoid variable memory |
| # consumption / spilling behaviour. |
| #################################################### |
| |
| SET BUFFER_POOL_LIMIT=40m; |
| SET RUNTIME_FILTER_MODE=GLOBAL; |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_BLOOM_FILTER_SIZE=16M; |
| select STRAIGHT_JOIN count(a.l_comment) |
| from tpch_parquet.lineitem a join tpch_parquet.lineitem b |
| on a.l_comment = b.l_comment; |
| ---- RESULTS |
| 51495713 |
| ---- TYPES |
| BIGINT |
| ---- RUNTIME_PROFILE |
| row_regex: .*SpilledPartitions: [1-9]\d* .* |
| row_regex: .*Rows processed: 16.38K.* |
| row_regex: .*Rows rejected: 0 .* |
| row_regex: .*1 of 1 Runtime Filter Published.* |
| ==== |
| |
| |
| ---- QUERY |
| #################################################### |
| # Test case 2: Filters are still effective inside subplans |
| # (in certain cases). The query has a HJ-scan pair inside a |
| # subplan (on the LHS). |
| #################################################### |
| SET RUNTIME_FILTER_MODE=GLOBAL; |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| select straight_join count(1) |
| from alltypes a join complextypestbl b |
| # b.id + 10 confuses planner, so it doesn't think it |
| # can transitively push a.id < 20 to scan of a. |
| on a.id = b.id + 10 join b.int_array where b.id < 10 |
| ---- RESULTS |
| 10 |
| ---- RUNTIME_PROFILE |
| row_regex: .*1 of 1 Runtime Filter Published.* |
| row_regex: .*Rows rejected: 2.43K \(2432\).* |
| ==== |
| |
| |
| ---- QUERY |
| #################################################### |
| # Test case 3: Filters will not be used if they exceed |
| # the configured memory limit on the coordinator. |
| # To test this, we need to construct a query where memory |
| # consumption on the coordinator exceeds MEM_LIMIT, but |
| # not on the backends (because otherwise they will disable |
| # the filters through another path). We set MEM_LIMIT to |
| # the minimum possible then set filter size to be roughly |
| # half that: since the coordinator must aggregate two of |
| # these filters (and indeed must create one as well), it |
| # will exceed the memory limit. This is checked for |
| # indirectly by confirming that the filter had no effect |
| # (when usually it would be selective). |
| #################################################### |
| SET RUNTIME_FILTER_MODE=GLOBAL; |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MIN_SIZE=128MB; |
| SET RUNTIME_FILTER_MAX_SIZE=500MB; |
| SET MEM_LIMIT=140MB; |
| select STRAIGHT_JOIN * from alltypes a join [SHUFFLE] alltypes b |
| on a.month = b.id and b.int_col = -3 |
| ---- RESULTS |
| ---- RUNTIME_PROFILE |
| row_regex: .*Filter 0 \(128.00 MB\).* |
| row_regex: .*Files processed: 8.* |
| row_regex: .*Files rejected: 0.* |
| ==== |
| ---- QUERY |
| # Confirm that with broadcast join, memory limit is not hit. |
| SET RUNTIME_FILTER_MODE=GLOBAL; |
| SET RUNTIME_FILTER_WAIT_TIME_MS=30000; |
| SET RUNTIME_FILTER_MIN_SIZE=128MB; |
| SET RUNTIME_FILTER_MAX_SIZE=500MB; |
| # Allocate enough memory for the join + filter + scan |
| SET MEM_LIMIT=170MB; |
| select STRAIGHT_JOIN * from alltypes a join [BROADCAST] alltypes b |
| on a.month = b.id and b.int_col = -3 |
| ---- RESULTS |
| ---- RUNTIME_PROFILE |
| row_regex: .*Filter 0 \(128.00 MB\).* |
| row_regex: .*Files processed: 8.* |
| row_regex: .*Files rejected: 8.* |
| ==== |