blob: d03b4c9c57ed61d14c2b9f580bad8e7248c5cfdc [file] [log] [blame]
====
---- QUERY
select id, bool_col from functional_parquet.alltypessmall where int_col < 0
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
set explain_level=2;
explain select id, bool_col from functional_parquet.alltypessmall where int_col < 0;
---- RESULTS: VERIFY_IS_SUBSET
' parquet statistics predicates: int_col < 0'
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where tinyint_col < 0
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where smallint_col < 0
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where int_col < 0
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where bigint_col < 0
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where float_col < 0
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where double_col < 0
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
# Test with inverted predicate
select id, bool_col from functional_parquet.alltypessmall where -1 > int_col
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where tinyint_col > 9
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where smallint_col > 9
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select id, bool_col from functional_parquet.alltypessmall where int_col > 9
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where bigint_col > 90
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where float_col > 9.9
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where double_col > 99
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where tinyint_col >= 10
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where tinyint_col <= 0
---- RESULTS
12
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 0
====
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where tinyint_col >= 9
---- RESULTS
8
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 0
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where tinyint_col = -1
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where tinyint_col = 10
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
set explain_level=2;
explain select count(*) from functional_parquet.alltypessmall where tinyint_col = 10
---- RESULTS: VERIFY_IS_SUBSET
' parquet statistics predicates: tinyint_col = 10'
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where id >= 30 and id <= 80
---- RESULTS
51
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 1
====
---- QUERY
# Mix with partitioning columns
select count(*) from functional_parquet.alltypessmall where int_col < 0 and year < 2012
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select id, bool_col from functional_parquet.alltypessmall where int_col < 3 - 3
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select id, bool_col from functional_parquet.alltypessmall where int_col < 3 - 3
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
# Test that without expr rewrite and thus without constant folding, constant exprs still
# can be used to prune row groups.
set enable_expr_rewrites=0;
select id, bool_col from functional_parquet.alltypessmall where int_col < 3 - 3
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select id, bool_col from functional_parquet.alltypessmall where 5 + 5 < int_col
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
# Test that without expr rewrite and thus without constant folding, constant exprs still
# can be used to prune row groups.
set enable_expr_rewrites=0;
select id, bool_col from functional_parquet.alltypessmall where 5 + 5 < int_col
---- RESULTS
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
# Test name based column resolution
create table name_resolve stored as parquet as select * from functional_parquet.alltypessmall;
alter table name_resolve replace columns (int_col int, bool_col boolean, tinyint_col tinyint, smallint_col smallint, id int);
set parquet_fallback_schema_resolution=NAME;
# If this picks up the stats from int_col, then it will filter all row groups and return
# an incorrect result.
select count(*) from name_resolve where id > 10;
---- RESULTS
89
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 1
aggregation(SUM, NumStatsFilteredRowGroups): 0
====
---- QUERY
# Query that has an implicit cast to a larger integer type
select count(*) from functional_parquet.alltypessmall where tinyint_col > 1000000000000
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
# Predicates with explicit casts are not supported when evaluating parquet::Statistics.
select count(*) from functional_parquet.alltypessmall where '0' > cast(tinyint_col as string)
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 0
====
---- QUERY
# Explicit casts between numerical types can violate the transitivity of "min()", so they
# are not supported when evaluating parquet::Statistics.
select count(*) from functional_parquet.alltypes where cast(id as tinyint) < 10;
---- RESULTS
3878
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 24
aggregation(SUM, NumStatsFilteredRowGroups): 0
====
---- QUERY
select count(*) from functional_parquet.complextypestbl.int_array where pos < 5;
---- RESULTS
9
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
aggregation(SUM, NumStatsFilteredRowGroups): 0
====
---- QUERY
# Test the conversion of constant IN lists to min/max predicats
select count(*) from functional_parquet.alltypes where int_col in (-1,-2,-3,-4);
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 24
aggregation(SUM, NumStatsFilteredRowGroups): 24
====
---- QUERY
select count(*) from functional_parquet.alltypes where id IN (1,25,49);
---- RESULTS
3
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 24
aggregation(SUM, NumStatsFilteredRowGroups): 23
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where string_col < "0"
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where string_col <= "/"
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where string_col < "1"
---- RESULTS
12
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 0
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where string_col >= "9"
---- RESULTS
8
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 0
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where string_col > ":"
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where timestamp_col < "2009-01-01 00:00:00"
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where timestamp_col <= "2009-01-01 00:00:00"
---- RESULTS
1
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 3
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where timestamp_col = "2009-01-01 00:00:00"
---- RESULTS
1
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 3
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where timestamp_col > "2009-04-03 00:24:00.96"
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 4
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where timestamp_col >= "2009-04-03 00:24:00.96"
---- RESULTS
1
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 3
====
---- QUERY
select count(*) from functional_parquet.alltypessmall where timestamp_col = "2009-04-03 00:24:00.96"
---- RESULTS
1
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
aggregation(SUM, NumStatsFilteredRowGroups): 3
====
---- QUERY
select count(*) from functional_parquet.decimal_tbl where d1 < 1234
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 1
aggregation(SUM, NumStatsFilteredRowGroups): 1
====
---- QUERY
select count(*) from functional_parquet.decimal_tbl where d3 < 1.23456789
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 1
aggregation(SUM, NumStatsFilteredRowGroups): 1
====
---- QUERY
select count(*) from functional_parquet.decimal_tbl where d3 = 1.23456788
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 1
aggregation(SUM, NumStatsFilteredRowGroups): 1
====
---- QUERY
select count(*) from functional_parquet.decimal_tbl where d3 = 1.23456789
---- RESULTS
1
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 1
aggregation(SUM, NumStatsFilteredRowGroups): 0
====
---- QUERY
select count(*) from functional_parquet.decimal_tbl where d4 > 0.123456789
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 1
aggregation(SUM, NumStatsFilteredRowGroups): 1
====
---- QUERY
select count(*) from functional_parquet.decimal_tbl where d4 >= 0.12345678
---- RESULTS
5
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 1
aggregation(SUM, NumStatsFilteredRowGroups): 0
====
---- QUERY
select count(*) from functional_parquet.decimal_tbl where d4 >= 0.12345679
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 1
aggregation(SUM, NumStatsFilteredRowGroups): 1
====
---- QUERY
# Test that stats are disabled for CHAR type columns.
create table chars (id int, c char(4)) stored as parquet;
insert into chars values (1, cast("abaa" as char(4))), (2, cast("abab" as char(4)));
select count(*) from chars;
---- RESULTS
2
====
---- QUERY
select count(*) from chars where c <= "aaaa"
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 1
aggregation(SUM, NumStatsFilteredRowGroups): 0
====
---- QUERY
# IMPALA-4988: Test that stats support can be disabled using the parquet_read_statistics
# query option.
set parquet_read_statistics=0;
select count(*) from functional_parquet.alltypes where id < 0;
---- RESULTS
0
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 24
aggregation(SUM, NumStatsFilteredRowGroups): 0
====