testdata/workloads/functional-query/queries/QueryTest/parquet-filtering.test - impala - Git at Google

 # This tests parquet dictionary filtering.
 ====
 ---- QUERY
 # id: All values pass
 # Filters 0/8 row groups
 select count(*) from functional_parquet.alltypes where id < 10000;
 ---- RESULTS
 7300
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # id: Some values pass
 # Filters 7/8 row groups
 select count(*) from functional_parquet.alltypes where mod(id, 10000) < 20;
 ---- RESULTS
 20
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 23
 ====
 ---- QUERY
 # id: No values pass
 # Filters 8/8 row groups
 select count(*) from functional_parquet.alltypes where mod(id,10000) = 7301;
 ---- RESULTS
 0
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 24
 ====
 ---- QUERY
 # tinyint_col: All values pass
 # Filters 0/8 row groups
 select count(*) from functional_parquet.alltypes where tinyint_col < 10;
 ---- RESULTS
 7300
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # tinyint_col: No values pass
 # Filters 8/8 row groups
 select count(*) from functional_parquet.alltypes where mod(tinyint_col,50) > 10;
 ---- RESULTS
 0
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 24
 ====
 ---- QUERY
 # smallint_col: All values pass
 # Filters 0/8 row groups
 select count(*) from functional_parquet.alltypes where smallint_col < 10
 ---- RESULTS
 7300
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # smallint_col: No values pass
 # Filters 8/8 row groups
 select count(*) from functional_parquet.alltypes where mod(smallint_col,50) > 10;
 ---- RESULTS
 0
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 24
 ====
 ---- QUERY
 # int_col: All values pass
 # Filters 0/8 row groups
 select count(*) from functional_parquet.alltypes where int_col < 10
 ---- RESULTS
 7300
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # int_col: No values pass
 # Filters 8/8 row groups
 select count(*) from functional_parquet.alltypes where mod(int_col, 50) > 10;
 ---- RESULTS
 0
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 24
 ====
 ---- QUERY
 # bigint_col: All values pass
 # Filters 0/8 row groups
 select count(*) from functional_parquet.alltypes where bigint_col < 100
 ---- RESULTS
 7300
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # bigint_col: No values pass
 # Filters 8/8 row groups
 select count(*) from functional_parquet.alltypes where mod(bigint_col, 500) > 100;
 ---- RESULTS
 0
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 24
 ====
 ---- QUERY
 # float_col: All values pass
 # Filters 0/8 row groups
 select count(*) from functional_parquet.alltypes where float_col < 10
 ---- RESULTS
 7300
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # float_col: No values pass
 # Filters 8/8 row groups
 select count(*) from functional_parquet.alltypes where mod(float_col, 100) > 10;
 ---- RESULTS
 0
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 24
 ====
 ---- QUERY
 # double_col: All values pass
 # Filters 0/8 row groups
 select count(*) from functional_parquet.alltypes where double_col < 100
 ---- RESULTS
 7300
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # double_col: No values pass
 # Filters 8/8 row groups
 select count(*) from functional_parquet.alltypes where mod(double_col, 100) > 100;
 ---- RESULTS
 0
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 24
 ====
 ---- QUERY
 # date_string_col: All values pass
 # Filters 0/8 row groups
 select count(*) from functional_parquet.alltypes where date_string_col like '%/%/%';
 ---- RESULTS
 7300
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # date_string_col: Half of the values pass
 # Filters 4/8 row groups
 select count(*) from functional_parquet.alltypes where date_string_col like '%/10';
 ---- RESULTS
 3650
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 12
 ====
 ---- QUERY
 # date_string_col: No values pass
 # Filters 8/8 row groups
 select count(*) from functional_parquet.alltypes where date_string_col = '01/01/11';
 ---- RESULTS
 0
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 24
 ====
 ---- QUERY
 # string_col: All values pass
 # Filters 0/8 row groups
 select count(*) from functional_parquet.alltypes where length(string_col) = 1 ;
 ---- RESULTS
 7300
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # string_col: No values pass
 # Filters 8/8 row groups
 select count(*) from functional_parquet.alltypes where string_col = '10';
 ---- RESULTS
 0
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 24
 ====
 ---- QUERY
 # timestamp_col: All values pass
 # Filters 0/8 row groups
 select count(*) from functional_parquet.alltypes where timestamp_col >= '2009-01-01 00:00:00';
 ---- RESULTS
 7300
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # timestamp_col: No values pass
 # Note: dictionary filtering currently does not work on timestamps
 # Filters 0/8 row groups
 select count(*) from functional_parquet.alltypes where timestamp_col = '2009-01-01 00:00:01';
 ---- RESULTS
 0
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # date_col: All values pass
 # Filters 0/4 row groups
 select count(*) from functional_parquet.date_tbl where date_col > '2017-12-31';
 ---- RESULTS
 4
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 4
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # date_col: No values pass
 # Note: dictionary filtering currently does not work on dates (see IMPALA-4994)
 # Filters 0/4 row groups
 select count(*) from functional_parquet.date_tbl where date_col = '2017-12-31';
 ---- RESULTS
 0
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 4
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # TPC-H lineitem.l_orderkey has high cardinality (1500000)
 # It always uses mixed encoding (PLAIN_DICTIONARY + PLAIN)
 # Verify that no dictionary filtering is used even for a predicate
 # that eliminates all rows.
 select count(*) from tpch_parquet.lineitem where l_orderkey = 50;
 ---- RESULTS
 0
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 3
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # Verify dictionary filtering on top level of a schema with nested
 # data.
 select count(*) from tpch_nested_parquet.customer where c_mktsegment = 'COMEDY';
 ---- RESULTS
 0
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 4
 aggregation(SUM, NumDictFilteredRowGroups): 4
 ====
 ---- QUERY
 # Verify that parquet_dictionary_filtering=false turns off dictionary filtering
 # This query filters 8/8 row groups when filtering is on. It should now filter
 # zero.
 set parquet_dictionary_filtering=false;
 select count(*) from functional_parquet.alltypes where mod(id,10000) = 7301;
 ---- RESULTS
 0
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 24
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # Nested dictionary filtering.
 #
 # Test coverage includes the following dimensions:
 # - nested collection type: map, array, struct
 # - number of filters and their nesting depth
 # - number of projections and their nesting depth
 # - required vs. non-required collections (outer vs. inner joins)
 # - filter matches: some, none
 # - count(*) optimization (exercises special code path)
 # - multiple row-groups per file
 #
 # Map key at depth 1. All required. No matches. Only one row-group is dictionary filtered
 # since only one (of two) row-groups is dictionary encoded for that column.
 select id from functional_parquet.complextypestbl f, f.int_map m where m.key = 'k5'
 ---- RESULTS
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 2
 aggregation(SUM, NumDictFilteredRowGroups): 1
 ====
 ---- QUERY
 select count(*) from functional_parquet.complextypestbl.int_map m where m.key = 'k5'
 ---- RESULTS
 0
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 2
 aggregation(SUM, NumDictFilteredRowGroups): 1
 ====
 ---- QUERY
 # Map key at depth 1. All required. Matches.
 select id from functional_parquet.complextypestbl f, f.int_map m where m.key = 'k1'
 ---- RESULTS
 8
 1
 2
 7
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 2
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # Map key at depth 1. None required. No matches.
 select id from functional_parquet.complextypestbl f left outer join f.int_map m
 where m.key = 'k5'
 ---- RESULTS
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 2
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # Map key at depth 1. None required. Matches.
 select id from functional_parquet.complextypestbl f left outer join f.int_map m
 where m.key = 'k1'
 ---- RESULTS
 8
 1
 2
 7
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 2
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # Map key at depth 1. All required. No matches. count(*).
 select count(*) from functional_parquet.complextypestbl f, f.int_map m where m.key = 'k5'
 ---- RESULTS
 0
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 2
 aggregation(SUM, NumDictFilteredRowGroups): 1
 ---- QUERY
 # Map key at depth 1. All required. Matches. count(*)
 select count(*) from functional_parquet.complextypestbl f, f.int_map m where m.key = 'k1'
 ---- RESULTS
 4
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 2
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # Map key at depth 1. None required. No matches. count(*)
 select count(*) from functional_parquet.complextypestbl f left outer join f.int_map m
 where m.key = 'k5'
 ---- RESULTS
 0
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 2
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # Map key at depth 1. None required. Matches. count(*)
 select count(*) from functional_parquet.complextypestbl f left outer join f.int_map m
 where m.key = 'k1'
 ---- RESULTS
 4
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 2
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # Array struct value at depth 2. All required. Matches.
 select r_name from tpch_nested_parquet.region r, r.r_nations n where n.n_name = 'FRANCE'
 ---- RESULTS
 'EUROPE'
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 1
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # Array struct value at depth 3. All required. No matches.
 select c_name from tpch_nested_parquet.customer c, c.c_orders o, o.o_lineitems l
 where l.l_returnflag = 'foo'
 ---- RESULTS
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 4
 aggregation(SUM, NumDictFilteredRowGroups): 4
 ====
 ---- QUERY
 # Array struct value at depth 3. All required. No matches.
 # Multiple nested values projected.
 select c_name, o.o_clerk from tpch_nested_parquet.customer c,
 c.c_orders o, o.o_lineitems l
 where l.l_returnflag = 'foo'
 ---- RESULTS
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 4
 aggregation(SUM, NumDictFilteredRowGroups): 4
 ====
 ---- QUERY
 # Array struct value at depth 3. Bottom not required. No matches.
 # Multiple nested values projected.
 select c_name, o.o_clerk from tpch_nested_parquet.customer c,
 c.c_orders o left outer join o.o_lineitems l
 where l.l_returnflag = 'foo'
 ---- RESULTS
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 4
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # Array struct value at depth 3. Top not required. No matches.
 # Multiple nested values projected.
 select c_name, o.o_clerk from tpch_nested_parquet.customer c left outer join c.c_orders o,
 o.o_lineitems l
 where l.l_returnflag = 'foo'
 ---- RESULTS
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 4
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # Array struct value at depth 3. All required. Multiple filters, multiple projections.
 # Matches.
 select c_name, o.o_clerk from tpch_nested_parquet.customer c,
 c.c_orders o, o.o_lineitems l
 where l.l_shipdate = '1994-08-19' and
 l.l_receiptdate = '1994-08-24' and l.l_shipmode = 'RAIL' and l.l_returnflag = 'R'
 ---- RESULTS
 'Customer#000013873','Clerk#000000554'
 'Customer#000049757','Clerk#000000156'
 'Customer#000037490','Clerk#000000026'
 'Customer#000002836','Clerk#000000577'
 'Customer#000004897','Clerk#000000112'
 'Customer#000107891','Clerk#000000576'
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 4
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ---- QUERY
 # Array struct value at depth 3. All required. No matches, count(*).
 select count(*) from tpch_nested_parquet.customer c, c.c_orders o, o.o_lineitems l
 where l.l_returnflag = 'foo'
 ---- RESULTS
 0
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 4
 aggregation(SUM, NumDictFilteredRowGroups): 4
 ====
 ---- QUERY
 # Array struct values at depths 2 and 3. All required.
 # Matches, multiple nested values projected.
 select c_name, o.o_comment from tpch_nested_parquet.customer c,
 c.c_orders o, o.o_lineitems l
 where l.l_returnflag = 'foo' and o.o_clerk = 'foo'
 ---- RESULTS
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 4
 aggregation(SUM, NumDictFilteredRowGroups): 4
 ====
 ---- QUERY
 # Array struct value at depth 2. Not required. No match.
 # Illustrates a case that should not be pruned.
 select count(*) from tpch_nested_parquet.customer c left outer join
 (select * from c.c_orders o where o.o_orderstatus = 'foo') v
 ---- RESULTS
 150000
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 4
 aggregation(SUM, NumDictFilteredRowGroups): 0
 ====
 ---- QUERY
 # Tests dictionary filtering with files with multiple row-groups.
 # Expect all results to be filtered.
 select l.l_linenumber from functional_parquet.customer_multiblock c,
 c.c_orders o, o.o_lineitems l
 where l.l_linenumber + 1 < 0;
 ---- RESULTS
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 2
 aggregation(SUM, NumDictFilteredRowGroups): 2
 ---- QUERY
 # Tests dictionary filtering with files with multiple row-groups and count(*).
 # Expect all results to be filtered.
 select count(*) from functional_parquet.customer_multiblock c,
 c.c_orders o, o.o_lineitems l
 where l.l_linenumber + 1 < 0;
 ---- RESULTS
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 2
 aggregation(SUM, NumDictFilteredRowGroups): 2
 ---- QUERY
 # Tests dictionary filtering with files with multiple row-groups and count(*).
 # Expect same result as obtained with dictionary filtering disabled.
 select count(*) from functional_parquet.customer_multiblock c,
 c.c_orders o, o.o_lineitems l
 where l.l_linenumber > 0;
 ---- RESULTS
 7786
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumRowGroups): 2
 aggregation(SUM, NumDictFilteredRowGroups): 0
	# This tests parquet dictionary filtering.
	====
	---- QUERY
	# id: All values pass
	# Filters 0/8 row groups
	select count(*) from functional_parquet.alltypes where id < 10000;
	---- RESULTS
	7300
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# id: Some values pass
	# Filters 7/8 row groups
	select count(*) from functional_parquet.alltypes where mod(id, 10000) < 20;
	---- RESULTS
	20
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 23
	====
	---- QUERY
	# id: No values pass
	# Filters 8/8 row groups
	select count(*) from functional_parquet.alltypes where mod(id,10000) = 7301;
	---- RESULTS
	0
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 24
	====
	---- QUERY
	# tinyint_col: All values pass
	# Filters 0/8 row groups
	select count(*) from functional_parquet.alltypes where tinyint_col < 10;
	---- RESULTS
	7300
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# tinyint_col: No values pass
	# Filters 8/8 row groups
	select count(*) from functional_parquet.alltypes where mod(tinyint_col,50) > 10;
	---- RESULTS
	0
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 24
	====
	---- QUERY
	# smallint_col: All values pass
	# Filters 0/8 row groups
	select count(*) from functional_parquet.alltypes where smallint_col < 10
	---- RESULTS
	7300
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# smallint_col: No values pass
	# Filters 8/8 row groups
	select count(*) from functional_parquet.alltypes where mod(smallint_col,50) > 10;
	---- RESULTS
	0
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 24
	====
	---- QUERY
	# int_col: All values pass
	# Filters 0/8 row groups
	select count(*) from functional_parquet.alltypes where int_col < 10
	---- RESULTS
	7300
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# int_col: No values pass
	# Filters 8/8 row groups
	select count(*) from functional_parquet.alltypes where mod(int_col, 50) > 10;
	---- RESULTS
	0
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 24
	====
	---- QUERY
	# bigint_col: All values pass
	# Filters 0/8 row groups
	select count(*) from functional_parquet.alltypes where bigint_col < 100
	---- RESULTS
	7300
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# bigint_col: No values pass
	# Filters 8/8 row groups
	select count(*) from functional_parquet.alltypes where mod(bigint_col, 500) > 100;
	---- RESULTS
	0
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 24
	====
	---- QUERY
	# float_col: All values pass
	# Filters 0/8 row groups
	select count(*) from functional_parquet.alltypes where float_col < 10
	---- RESULTS
	7300
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# float_col: No values pass
	# Filters 8/8 row groups
	select count(*) from functional_parquet.alltypes where mod(float_col, 100) > 10;
	---- RESULTS
	0
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 24
	====
	---- QUERY
	# double_col: All values pass
	# Filters 0/8 row groups
	select count(*) from functional_parquet.alltypes where double_col < 100
	---- RESULTS
	7300
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# double_col: No values pass
	# Filters 8/8 row groups
	select count(*) from functional_parquet.alltypes where mod(double_col, 100) > 100;
	---- RESULTS
	0
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 24
	====
	---- QUERY
	# date_string_col: All values pass
	# Filters 0/8 row groups
	select count(*) from functional_parquet.alltypes where date_string_col like '%/%/%';
	---- RESULTS
	7300
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# date_string_col: Half of the values pass
	# Filters 4/8 row groups
	select count(*) from functional_parquet.alltypes where date_string_col like '%/10';
	---- RESULTS
	3650
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 12
	====
	---- QUERY
	# date_string_col: No values pass
	# Filters 8/8 row groups
	select count(*) from functional_parquet.alltypes where date_string_col = '01/01/11';
	---- RESULTS
	0
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 24
	====
	---- QUERY
	# string_col: All values pass
	# Filters 0/8 row groups
	select count(*) from functional_parquet.alltypes where length(string_col) = 1 ;
	---- RESULTS
	7300
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# string_col: No values pass
	# Filters 8/8 row groups
	select count(*) from functional_parquet.alltypes where string_col = '10';
	---- RESULTS
	0
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 24
	====
	---- QUERY
	# timestamp_col: All values pass
	# Filters 0/8 row groups
	select count(*) from functional_parquet.alltypes where timestamp_col >= '2009-01-01 00:00:00';
	---- RESULTS
	7300
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# timestamp_col: No values pass
	# Note: dictionary filtering currently does not work on timestamps
	# Filters 0/8 row groups
	select count(*) from functional_parquet.alltypes where timestamp_col = '2009-01-01 00:00:01';
	---- RESULTS
	0
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# date_col: All values pass
	# Filters 0/4 row groups
	select count(*) from functional_parquet.date_tbl where date_col > '2017-12-31';
	---- RESULTS
	4
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 4
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# date_col: No values pass
	# Note: dictionary filtering currently does not work on dates (see IMPALA-4994)
	# Filters 0/4 row groups
	select count(*) from functional_parquet.date_tbl where date_col = '2017-12-31';
	---- RESULTS
	0
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 4
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# TPC-H lineitem.l_orderkey has high cardinality (1500000)
	# It always uses mixed encoding (PLAIN_DICTIONARY + PLAIN)
	# Verify that no dictionary filtering is used even for a predicate
	# that eliminates all rows.
	select count(*) from tpch_parquet.lineitem where l_orderkey = 50;
	---- RESULTS
	0
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 3
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# Verify dictionary filtering on top level of a schema with nested
	# data.
	select count(*) from tpch_nested_parquet.customer where c_mktsegment = 'COMEDY';
	---- RESULTS
	0
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 4
	aggregation(SUM, NumDictFilteredRowGroups): 4
	====
	---- QUERY
	# Verify that parquet_dictionary_filtering=false turns off dictionary filtering
	# This query filters 8/8 row groups when filtering is on. It should now filter
	# zero.
	set parquet_dictionary_filtering=false;
	select count(*) from functional_parquet.alltypes where mod(id,10000) = 7301;
	---- RESULTS
	0
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 24
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# Nested dictionary filtering.
	#
	# Test coverage includes the following dimensions:
	# - nested collection type: map, array, struct
	# - number of filters and their nesting depth
	# - number of projections and their nesting depth
	# - required vs. non-required collections (outer vs. inner joins)
	# - filter matches: some, none
	# - count(*) optimization (exercises special code path)
	# - multiple row-groups per file
	#
	# Map key at depth 1. All required. No matches. Only one row-group is dictionary filtered
	# since only one (of two) row-groups is dictionary encoded for that column.
	select id from functional_parquet.complextypestbl f, f.int_map m where m.key = 'k5'
	---- RESULTS
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 2
	aggregation(SUM, NumDictFilteredRowGroups): 1
	====
	---- QUERY
	select count(*) from functional_parquet.complextypestbl.int_map m where m.key = 'k5'
	---- RESULTS
	0
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 2
	aggregation(SUM, NumDictFilteredRowGroups): 1
	====
	---- QUERY
	# Map key at depth 1. All required. Matches.
	select id from functional_parquet.complextypestbl f, f.int_map m where m.key = 'k1'
	---- RESULTS
	8
	1
	2
	7
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 2
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# Map key at depth 1. None required. No matches.
	select id from functional_parquet.complextypestbl f left outer join f.int_map m
	where m.key = 'k5'
	---- RESULTS
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 2
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# Map key at depth 1. None required. Matches.
	select id from functional_parquet.complextypestbl f left outer join f.int_map m
	where m.key = 'k1'
	---- RESULTS
	8
	1
	2
	7
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 2
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# Map key at depth 1. All required. No matches. count(*).
	select count(*) from functional_parquet.complextypestbl f, f.int_map m where m.key = 'k5'
	---- RESULTS
	0
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 2
	aggregation(SUM, NumDictFilteredRowGroups): 1
	---- QUERY
	# Map key at depth 1. All required. Matches. count(*)
	select count(*) from functional_parquet.complextypestbl f, f.int_map m where m.key = 'k1'
	---- RESULTS
	4
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 2
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# Map key at depth 1. None required. No matches. count(*)
	select count(*) from functional_parquet.complextypestbl f left outer join f.int_map m
	where m.key = 'k5'
	---- RESULTS
	0
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 2
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# Map key at depth 1. None required. Matches. count(*)
	select count(*) from functional_parquet.complextypestbl f left outer join f.int_map m
	where m.key = 'k1'
	---- RESULTS
	4
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 2
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# Array struct value at depth 2. All required. Matches.
	select r_name from tpch_nested_parquet.region r, r.r_nations n where n.n_name = 'FRANCE'
	---- RESULTS
	'EUROPE'
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 1
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# Array struct value at depth 3. All required. No matches.
	select c_name from tpch_nested_parquet.customer c, c.c_orders o, o.o_lineitems l
	where l.l_returnflag = 'foo'
	---- RESULTS
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 4
	aggregation(SUM, NumDictFilteredRowGroups): 4
	====
	---- QUERY
	# Array struct value at depth 3. All required. No matches.
	# Multiple nested values projected.
	select c_name, o.o_clerk from tpch_nested_parquet.customer c,
	c.c_orders o, o.o_lineitems l
	where l.l_returnflag = 'foo'
	---- RESULTS
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 4
	aggregation(SUM, NumDictFilteredRowGroups): 4
	====
	---- QUERY
	# Array struct value at depth 3. Bottom not required. No matches.
	# Multiple nested values projected.
	select c_name, o.o_clerk from tpch_nested_parquet.customer c,
	c.c_orders o left outer join o.o_lineitems l
	where l.l_returnflag = 'foo'
	---- RESULTS
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 4
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# Array struct value at depth 3. Top not required. No matches.
	# Multiple nested values projected.
	select c_name, o.o_clerk from tpch_nested_parquet.customer c left outer join c.c_orders o,
	o.o_lineitems l
	where l.l_returnflag = 'foo'
	---- RESULTS
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 4
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# Array struct value at depth 3. All required. Multiple filters, multiple projections.
	# Matches.
	select c_name, o.o_clerk from tpch_nested_parquet.customer c,
	c.c_orders o, o.o_lineitems l
	where l.l_shipdate = '1994-08-19' and
	l.l_receiptdate = '1994-08-24' and l.l_shipmode = 'RAIL' and l.l_returnflag = 'R'
	---- RESULTS
	'Customer#000013873','Clerk#000000554'
	'Customer#000049757','Clerk#000000156'
	'Customer#000037490','Clerk#000000026'
	'Customer#000002836','Clerk#000000577'
	'Customer#000004897','Clerk#000000112'
	'Customer#000107891','Clerk#000000576'
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 4
	aggregation(SUM, NumDictFilteredRowGroups): 0
	---- QUERY
	# Array struct value at depth 3. All required. No matches, count(*).
	select count(*) from tpch_nested_parquet.customer c, c.c_orders o, o.o_lineitems l
	where l.l_returnflag = 'foo'
	---- RESULTS
	0
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 4
	aggregation(SUM, NumDictFilteredRowGroups): 4
	====
	---- QUERY
	# Array struct values at depths 2 and 3. All required.
	# Matches, multiple nested values projected.
	select c_name, o.o_comment from tpch_nested_parquet.customer c,
	c.c_orders o, o.o_lineitems l
	where l.l_returnflag = 'foo' and o.o_clerk = 'foo'
	---- RESULTS
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 4
	aggregation(SUM, NumDictFilteredRowGroups): 4
	====
	---- QUERY
	# Array struct value at depth 2. Not required. No match.
	# Illustrates a case that should not be pruned.
	select count(*) from tpch_nested_parquet.customer c left outer join
	(select * from c.c_orders o where o.o_orderstatus = 'foo') v
	---- RESULTS
	150000
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 4
	aggregation(SUM, NumDictFilteredRowGroups): 0
	====
	---- QUERY
	# Tests dictionary filtering with files with multiple row-groups.
	# Expect all results to be filtered.
	select l.l_linenumber from functional_parquet.customer_multiblock c,
	c.c_orders o, o.o_lineitems l
	where l.l_linenumber + 1 < 0;
	---- RESULTS
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 2
	aggregation(SUM, NumDictFilteredRowGroups): 2
	---- QUERY
	# Tests dictionary filtering with files with multiple row-groups and count(*).
	# Expect all results to be filtered.
	select count(*) from functional_parquet.customer_multiblock c,
	c.c_orders o, o.o_lineitems l
	where l.l_linenumber + 1 < 0;
	---- RESULTS
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 2
	aggregation(SUM, NumDictFilteredRowGroups): 2
	---- QUERY
	# Tests dictionary filtering with files with multiple row-groups and count(*).
	# Expect same result as obtained with dictionary filtering disabled.
	select count(*) from functional_parquet.customer_multiblock c,
	c.c_orders o, o.o_lineitems l
	where l.l_linenumber > 0;
	---- RESULTS
	7786
	---- RUNTIME_PROFILE
	aggregation(SUM, NumRowGroups): 2
	aggregation(SUM, NumDictFilteredRowGroups): 0