| ==== |
| ---- QUERY |
| select id, bool_col from functional_parquet.alltypessmall where int_col < 0 |
| ---- RESULTS |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| set explain_level=2; |
| explain select id, bool_col from functional_parquet.alltypessmall where int_col < 0; |
| ---- RESULTS: VERIFY_IS_SUBSET |
| ' parquet statistics predicates: int_col < CAST(0 AS INT)' |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where tinyint_col < 0 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where smallint_col < 0 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where int_col < 0 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where bigint_col < 0 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where float_col < 0 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where double_col < 0 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| # Test with inverted predicate |
| select id, bool_col from functional_parquet.alltypessmall where -1 > int_col |
| ---- RESULTS |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where tinyint_col > 9 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where smallint_col > 9 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select id, bool_col from functional_parquet.alltypessmall where int_col > 9 |
| ---- RESULTS |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where bigint_col > 90 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where float_col > 9.9 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where double_col > 99 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where tinyint_col >= 10 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where tinyint_col <= 0 |
| ---- RESULTS |
| 12 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 0 |
| ==== |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where tinyint_col >= 9 |
| ---- RESULTS |
| 8 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 0 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where tinyint_col = -1 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where tinyint_col = 10 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| set explain_level=2; |
| explain select count(*) from functional_parquet.alltypessmall where tinyint_col = 10 |
| ---- RESULTS: VERIFY_IS_SUBSET |
| ' parquet statistics predicates: tinyint_col = CAST(10 AS TINYINT)' |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where id >= 30 and id <= 80 |
| ---- RESULTS |
| 51 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 1 |
| ==== |
| ---- QUERY |
| # Mix with partitioning columns |
| select count(*) from functional_parquet.alltypessmall where int_col < 0 and year < 2012 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select id, bool_col from functional_parquet.alltypessmall where int_col < 3 - 3 |
| ---- RESULTS |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select id, bool_col from functional_parquet.alltypessmall where int_col < 3 - 3 |
| ---- RESULTS |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| # Test that without expr rewrite and thus without constant folding, constant exprs still |
| # can be used to prune row groups. |
| set enable_expr_rewrites=0; |
| select id, bool_col from functional_parquet.alltypessmall where int_col < 3 - 3 |
| ---- RESULTS |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select id, bool_col from functional_parquet.alltypessmall where 5 + 5 < int_col |
| ---- RESULTS |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| # Test that without expr rewrite and thus without constant folding, constant exprs still |
| # can be used to prune row groups. |
| set enable_expr_rewrites=0; |
| select id, bool_col from functional_parquet.alltypessmall where 5 + 5 < int_col |
| ---- RESULTS |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| # Test name based column resolution |
| create table name_resolve stored as parquet as select * from functional_parquet.alltypessmall; |
| alter table name_resolve replace columns (int_col int, bool_col boolean, tinyint_col tinyint, smallint_col smallint, id int); |
| set parquet_fallback_schema_resolution=NAME; |
| # If this picks up the stats from int_col, then it will filter all row groups and return |
| # an incorrect result. |
| select count(*) from name_resolve where id > 10; |
| ---- RESULTS |
| 89 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 1 |
| aggregation(SUM, NumStatsFilteredRowGroups): 0 |
| ==== |
| ---- QUERY |
| # Query that has an implicit cast to a larger integer type |
| select count(*) from functional_parquet.alltypessmall where tinyint_col > 1000000000000 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| # Predicates with explicit casts are not supported when evaluating parquet::Statistics. |
| select count(*) from functional_parquet.alltypessmall where '0' > cast(tinyint_col as string) |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 0 |
| ==== |
| ---- QUERY |
| # Explicit casts between numerical types can violate the transitivity of "min()", so they |
| # are not supported when evaluating parquet::Statistics. |
| select count(*) from functional_parquet.alltypes where cast(id as tinyint) < 10; |
| ---- RESULTS |
| 3878 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 24 |
| aggregation(SUM, NumStatsFilteredRowGroups): 0 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.complextypestbl.int_array where pos < 5; |
| ---- RESULTS |
| 9 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 2 |
| aggregation(SUM, NumStatsFilteredRowGroups): 0 |
| ==== |
| ---- QUERY |
| # Test the conversion of constant IN lists to min/max predicats |
| select count(*) from functional_parquet.alltypes where int_col in (-1,-2,-3,-4); |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 24 |
| aggregation(SUM, NumStatsFilteredRowGroups): 24 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypes where id IN (1,25,49); |
| ---- RESULTS |
| 3 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 24 |
| aggregation(SUM, NumStatsFilteredRowGroups): 23 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where string_col < "0" |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where string_col <= "/" |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where string_col < "1" |
| ---- RESULTS |
| 12 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 0 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where string_col >= "9" |
| ---- RESULTS |
| 8 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 0 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where string_col > ":" |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where timestamp_col < "2009-01-01 00:00:00" |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where timestamp_col <= "2009-01-01 00:00:00" |
| ---- RESULTS |
| 1 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 3 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where timestamp_col = "2009-01-01 00:00:00" |
| ---- RESULTS |
| 1 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 3 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where timestamp_col > "2009-04-03 00:24:00.96" |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 4 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where timestamp_col >= "2009-04-03 00:24:00.96" |
| ---- RESULTS |
| 1 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 3 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.alltypessmall where timestamp_col = "2009-04-03 00:24:00.96" |
| ---- RESULTS |
| 1 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 4 |
| aggregation(SUM, NumStatsFilteredRowGroups): 3 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.date_tbl |
| where date_part in ("2017-11-27", "1399-06-27") and date_col < '0001-06-21'; |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 2 |
| aggregation(SUM, NumStatsFilteredRowGroups): 2 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.date_tbl |
| where date_part in ("2017-11-27", "1399-06-27") and date_col <= '0001-06-21'; |
| ---- RESULTS |
| 1 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 2 |
| aggregation(SUM, NumStatsFilteredRowGroups): 1 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.date_tbl |
| where date_part in ("2017-11-27", "1399-06-27") and date_col = '0001-06-21'; |
| ---- RESULTS |
| 1 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 2 |
| aggregation(SUM, NumStatsFilteredRowGroups): 1 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.date_tbl |
| where date_part in ("2017-11-27", "1399-06-27") and date_col > '2018-12-31'; |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 2 |
| aggregation(SUM, NumStatsFilteredRowGroups): 2 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.date_tbl |
| where date_part in ("2017-11-27", "1399-06-27") and date_col >= '2018-12-31'; |
| ---- RESULTS |
| 1 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 2 |
| aggregation(SUM, NumStatsFilteredRowGroups): 1 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.date_tbl |
| where date_part in ("2017-11-27", "1399-06-27") and date_col = '2018-12-31'; |
| ---- RESULTS |
| 1 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 2 |
| aggregation(SUM, NumStatsFilteredRowGroups): 1 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.decimal_tbl where d1 < 1234 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 1 |
| aggregation(SUM, NumStatsFilteredRowGroups): 1 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.decimal_tbl where d3 < 1.23456789 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 1 |
| aggregation(SUM, NumStatsFilteredRowGroups): 1 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.decimal_tbl where d3 = 1.23456788 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 1 |
| aggregation(SUM, NumStatsFilteredRowGroups): 1 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.decimal_tbl where d3 = 1.23456789 |
| ---- RESULTS |
| 1 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 1 |
| aggregation(SUM, NumStatsFilteredRowGroups): 0 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.decimal_tbl where d4 > 0.123456789 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 1 |
| aggregation(SUM, NumStatsFilteredRowGroups): 1 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.decimal_tbl where d4 >= 0.12345678 |
| ---- RESULTS |
| 5 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 1 |
| aggregation(SUM, NumStatsFilteredRowGroups): 0 |
| ==== |
| ---- QUERY |
| select count(*) from functional_parquet.decimal_tbl where d4 >= 0.12345679 |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 1 |
| aggregation(SUM, NumStatsFilteredRowGroups): 1 |
| ==== |
| ---- QUERY |
| # Test that stats are disabled for CHAR type columns. |
| create table chars (id int, c char(4)) stored as parquet; |
| insert into chars values (1, cast("abaa" as char(4))), (2, cast("abab" as char(4))); |
| select count(*) from chars; |
| ---- RESULTS |
| 2 |
| ==== |
| ---- QUERY |
| select count(*) from chars where c <= "aaaa" |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 1 |
| aggregation(SUM, NumStatsFilteredRowGroups): 0 |
| ==== |
| ---- QUERY |
| # IMPALA-4988: Test that stats support can be disabled using the parquet_read_statistics |
| # query option. |
| set parquet_read_statistics=0; |
| select count(*) from functional_parquet.alltypes where id < 0; |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 24 |
| aggregation(SUM, NumStatsFilteredRowGroups): 0 |
| ==== |
| ---- QUERY |
| # Check that all the row groups are skipped using null_count stat |
| create table table_for_null_count_test (i int, j int) stored as parquet; |
| insert into table_for_null_count_test values (1, NULL), (2, NULL), (3, NULL); |
| select count(*) from table_for_null_count_test where j < 3; |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 1 |
| aggregation(SUM, NumStatsFilteredRowGroups): 1 |
| ==== |
| ---- QUERY |
| # Insert another row group where not all the 'j' values are NULL |
| insert into table_for_null_count_test values (4, 1), (5, NULL); |
| select i from table_for_null_count_test where j < 3; |
| ---- RESULTS |
| 4 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 2 |
| aggregation(SUM, NumStatsFilteredRowGroups): 1 |
| ==== |
| ---- QUERY |
| # Turning off parquet stats and verifying that no row groups are skipped |
| set PARQUET_READ_STATISTICS=0; |
| create table table_for_null_count_test2 (i int, j int) stored as parquet; |
| insert into table_for_null_count_test2 values (1, NULL), (2, NULL), (3, NULL); |
| select count(*) from table_for_null_count_test2 where j < 3; |
| ---- RESULTS |
| 0 |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 1 |
| aggregation(SUM, NumStatsFilteredRowGroups): 0 |
| ==== |
| ---- QUERY |
| # IMPALA-6527: NaN values lead to incorrect filtering. |
| # When the first value is NaN in a column chunk, Impala chose it as min_value and |
| # max_value for statistics. Test if it is no longer the case. |
| create table test_nan(val double) stored as parquet; |
| insert into test_nan values (cast('NaN' as double)), (42); |
| select * from test_nan where val > 0 |
| ---- RESULTS |
| 42 |
| ==== |
| ---- QUERY |
| # IMPALA-6527: NaN values lead to incorrect filtering |
| # Test if '<' predicate produces expected results as well. |
| select * from test_nan where val < 100 |
| ---- RESULTS |
| 42 |
| ==== |
| ---- QUERY |
| # IMPALA-6527: NaN values lead to incorrect filtering |
| # Test if valid statistics are written. The column chunk should be filtered out by |
| # the min filter. |
| select * from test_nan where val < 10 |
| ---- RESULTS |
| ---- RUNTIME_PROFILE |
| aggregation(SUM, NumRowGroups): 1 |
| aggregation(SUM, NumStatsFilteredRowGroups): 1 |
| ==== |
| ---- QUERY |
| # IMPALA-6527: NaN values lead to incorrect filtering |
| # Test predicate that is true for NaN. |
| select * from test_nan where not val >= 0 |
| ---- RESULTS |
| NaN |
| ==== |
| ---- QUERY |
| # IMPALA-6527: NaN values lead to incorrect filtering |
| # Test predicate that is true for NaN. |
| select * from test_nan where val != 0 |
| ---- RESULTS |
| NaN |
| 42 |
| ==== |
| ---- QUERY |
| # Statistics filtering must not filter out row groups if predicate can be true for NaN. |
| create table test_nan_true_predicate(val double) stored as parquet; |
| insert into test_nan_true_predicate values (10), (20), (cast('NaN' as double)); |
| select * from test_nan_true_predicate where not val >= 0 |
| ---- RESULTS |
| NaN |
| ==== |
| ---- QUERY |
| # NaN is the last element, predicate is true for NaN and value. |
| select * from test_nan_true_predicate where not val >= 20 |
| ---- RESULTS |
| 10 |
| NaN |
| ==== |
| ---- QUERY |
| # NaN is the last element, predicate is true for NaN and value. |
| select * from test_nan_true_predicate where val != 10 |
| ---- RESULTS |
| 20 |
| NaN |
| ==== |
| ---- QUERY |
| # Test the case when NaN is inserted between two values. |
| # Test predicate true for NaN and false for the values. |
| create table test_nan_in_the_middle(val double) stored as parquet; |
| insert into test_nan_in_the_middle values (10), (cast('NaN' as double)), (20); |
| select * from test_nan_in_the_middle where not val >= 0 |
| ---- RESULTS |
| NaN |
| ==== |
| ---- QUERY |
| # NaN in the middle, predicate true for NaN and value. |
| select * from test_nan_in_the_middle where not val >= 20 |
| ---- RESULTS |
| 10 |
| NaN |
| ==== |
| ---- QUERY |
| # NaN in the middle, '!=' should return NaN and value. |
| select * from test_nan_in_the_middle where val != 10 |
| ---- RESULTS |
| NaN |
| 20 |
| ==== |
| ---- QUERY |
| # Test the case when there are only NaNs in the column chunk. |
| # Test predicate true for NaN |
| create table test_nan_only(val double) stored as parquet; |
| insert into test_nan_only values (cast('NaN' as double)), (cast('NaN' as double)), |
| (cast('NaN' as double)); |
| select * from test_nan_only where not val >= 0 |
| ---- RESULTS |
| NaN |
| NaN |
| NaN |
| ==== |
| ---- QUERY |
| # There are only NaN values, predicate is false for NaN |
| select * from test_nan_only where val >= 20 |
| ---- RESULTS |
| ==== |
| ---- QUERY |
| # Test the case when a number is following multiple NaNs. |
| # Test predicate true for NaN, false for the inserted number |
| create table test_multiple_nans(val double) stored as parquet; |
| insert into test_multiple_nans values (cast('NaN' as double)), (cast('NaN' as double)), |
| (cast('NaN' as double)), (20); |
| select * from test_multiple_nans where not val >= 0 |
| ---- RESULTS |
| NaN |
| NaN |
| NaN |
| ==== |
| ---- QUERY |
| # Multiple NaNs followed by a number, predicate is false for NaN and true for the number |
| select * from test_multiple_nans where val >= 20 |
| ---- RESULTS |
| 20 |
| ==== |
| ---- QUERY |
| # Multiple NaNs followed by a number, predicate is true for NaN and true for the number |
| select * from test_multiple_nans where not val > 20 |
| ---- RESULTS |
| NaN |
| NaN |
| NaN |
| 20 |
| ==== |
| ---- QUERY |
| # Multiple NaNs followed by a number, predicate is false for NaN and false for the number |
| select * from test_multiple_nans where val > 20 |
| ---- RESULTS |
| ==== |