testdata/workloads/functional-query/queries/QueryTest/runtime_row_filters_phj.test - impala - Git at Google

 ====
 ---- QUERY
 # Test cases that are only valid for PHJ joins are in this file.

 ####################################################
 # Test case 1: PHJ nodes that spill should still produce filters.
 # Run this for Parquet only to avoid variable memory
 # consumption / spilling behaviour.
 ####################################################

 SET BUFFER_POOL_LIMIT=40m;
 SET RUNTIME_FILTER_MODE=GLOBAL;
 SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
 SET RUNTIME_BLOOM_FILTER_SIZE=16M;
 select STRAIGHT_JOIN count(a.l_comment)
     from tpch_parquet.lineitem a join tpch_parquet.lineitem b
         on a.l_comment = b.l_comment;
 ---- RESULTS
 51495713
 ---- TYPES
 BIGINT
 ---- RUNTIME_PROFILE
 row_regex: .*SpilledPartitions: [1-9]\d* .*
 row_regex: .*Rows processed: 16.38K.*
 row_regex: .*Rows rejected: 0 .*
 row_regex: .*1 of 1 Runtime Filter Published.*
 ====


 ---- QUERY
 ####################################################
 # Test case 2: Filters are still effective inside subplans
 # (in certain cases). The query has a HJ-scan pair inside a
 # subplan (on the LHS).
 ####################################################
 SET RUNTIME_FILTER_MODE=GLOBAL;
 SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
 select straight_join count(1)
     from alltypes a join complextypestbl b
         # b.id + 10 confuses planner, so it doesn't think it
         # can transitively push a.id < 20 to scan of a.
         on a.id = b.id + 10 join b.int_array where b.id < 10
 ---- RESULTS
 10
 ---- RUNTIME_PROFILE
 row_regex: .*1 of 1 Runtime Filter Published.*
 row_regex: .*Rows rejected: 2.43K \(2432\).*
 ====


 ---- QUERY
 ####################################################
 # Test case 3: Filters will not be used if they exceed
 # the configured memory limit on the coordinator.
 # To test this, we need to construct a query where memory
 # consumption on the coordinator exceeds MEM_LIMIT, but
 # not on the backends (because otherwise they will disable
 # the filters through another path). We set MEM_LIMIT to
 # the minimum possible then set filter size to be roughly
 # half that: since the coordinator must aggregate two of
 # these filters (and indeed must create one as well), it
 # will exceed the memory limit. This is checked for
 # indirectly by confirming that the filter had no effect
 # (when usually it would be selective).
 ####################################################
 SET RUNTIME_FILTER_MODE=GLOBAL;
 SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
 SET RUNTIME_FILTER_MIN_SIZE=128MB;
 SET RUNTIME_FILTER_MAX_SIZE=500MB;
 SET MEM_LIMIT=140MB;
 select STRAIGHT_JOIN * from alltypes a join [SHUFFLE] alltypes b
     on a.month = b.id and b.int_col = -3
 ---- RESULTS
 ---- RUNTIME_PROFILE
 row_regex: .*Filter 0 \(128.00 MB\).*
 row_regex: .*Files processed: 8.*
 row_regex: .*Files rejected: 0.*
 ====
 ---- QUERY
 # Confirm that with broadcast join, memory limit is not hit.
 SET RUNTIME_FILTER_MODE=GLOBAL;
 SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
 SET RUNTIME_FILTER_MIN_SIZE=128MB;
 SET RUNTIME_FILTER_MAX_SIZE=500MB;
 # Allocate enough memory for the join + filter + scan
 SET MEM_LIMIT=170MB;
 select STRAIGHT_JOIN * from alltypes a join [BROADCAST] alltypes b
     on a.month = b.id and b.int_col = -3
 ---- RESULTS
 ---- RUNTIME_PROFILE
 row_regex: .*Filter 0 \(128.00 MB\).*
 row_regex: .*Files processed: 8.*
 row_regex: .*Files rejected: 8.*
 ====
	====
	---- QUERY
	# Test cases that are only valid for PHJ joins are in this file.

	####################################################
	# Test case 1: PHJ nodes that spill should still produce filters.
	# Run this for Parquet only to avoid variable memory
	# consumption / spilling behaviour.
	####################################################

	SET BUFFER_POOL_LIMIT=40m;
	SET RUNTIME_FILTER_MODE=GLOBAL;
	SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
	SET RUNTIME_BLOOM_FILTER_SIZE=16M;
	select STRAIGHT_JOIN count(a.l_comment)
	from tpch_parquet.lineitem a join tpch_parquet.lineitem b
	on a.l_comment = b.l_comment;
	---- RESULTS
	51495713
	---- TYPES
	BIGINT
	---- RUNTIME_PROFILE
	row_regex: .SpilledPartitions: [1-9]\d .*
	row_regex: .Rows processed: 16.38K.
	row_regex: .Rows rejected: 0 .
	row_regex: .1 of 1 Runtime Filter Published.
	====


	---- QUERY
	####################################################
	# Test case 2: Filters are still effective inside subplans
	# (in certain cases). The query has a HJ-scan pair inside a
	# subplan (on the LHS).
	####################################################
	SET RUNTIME_FILTER_MODE=GLOBAL;
	SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
	select straight_join count(1)
	from alltypes a join complextypestbl b
	# b.id + 10 confuses planner, so it doesn't think it
	# can transitively push a.id < 20 to scan of a.
	on a.id = b.id + 10 join b.int_array where b.id < 10
	---- RESULTS
	10
	---- RUNTIME_PROFILE
	row_regex: .1 of 1 Runtime Filter Published.
	row_regex: .Rows rejected: 2.43K \(2432\).
	====


	---- QUERY
	####################################################
	# Test case 3: Filters will not be used if they exceed
	# the configured memory limit on the coordinator.
	# To test this, we need to construct a query where memory
	# consumption on the coordinator exceeds MEM_LIMIT, but
	# not on the backends (because otherwise they will disable
	# the filters through another path). We set MEM_LIMIT to
	# the minimum possible then set filter size to be roughly
	# half that: since the coordinator must aggregate two of
	# these filters (and indeed must create one as well), it
	# will exceed the memory limit. This is checked for
	# indirectly by confirming that the filter had no effect
	# (when usually it would be selective).
	####################################################
	SET RUNTIME_FILTER_MODE=GLOBAL;
	SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
	SET RUNTIME_FILTER_MIN_SIZE=128MB;
	SET RUNTIME_FILTER_MAX_SIZE=500MB;
	SET MEM_LIMIT=140MB;
	select STRAIGHT_JOIN * from alltypes a join [SHUFFLE] alltypes b
	on a.month = b.id and b.int_col = -3
	---- RESULTS
	---- RUNTIME_PROFILE
	row_regex: .Filter 0 \(128.00 MB\).
	row_regex: .Files processed: 8.
	row_regex: .Files rejected: 0.
	====
	---- QUERY
	# Confirm that with broadcast join, memory limit is not hit.
	SET RUNTIME_FILTER_MODE=GLOBAL;
	SET RUNTIME_FILTER_WAIT_TIME_MS=30000;
	SET RUNTIME_FILTER_MIN_SIZE=128MB;
	SET RUNTIME_FILTER_MAX_SIZE=500MB;
	# Allocate enough memory for the join + filter + scan
	SET MEM_LIMIT=170MB;
	select STRAIGHT_JOIN * from alltypes a join [BROADCAST] alltypes b
	on a.month = b.id and b.int_col = -3
	---- RESULTS
	---- RUNTIME_PROFILE
	row_regex: .Filter 0 \(128.00 MB\).
	row_regex: .Files processed: 8.
	row_regex: .Files rejected: 8.
	====