blob: 47c4eaa1e66a6835775301f515749271e8742d24 [file] [log] [blame]
# These tests check that the page selection and value-skipping logic works well when using
# the page index of the Parquet file. 'decimals_1_10' contains tiny, misaligned pages and
# some NULL values. Column 'd_10' has one value per page, while column 'd_1' has five
# values per page. Thus, with putting predicates on column 'd_10' we can craft different
# test cases for value skipping in 'd_1'.
====
---- QUERY
# 'd_10 = 1' selects the first row from each page. Therefore in the pages of 'd_1' we
# read the first value, then skip all the rest.
select * from decimals_1_10 where d_10 = 1
---- RESULTS
1,1
NULL,1
1,1
1,1
1,1
1,1
NULL,1
1,1
1,1
NULL,1
1,1
---- TYPES
DECIMAL, DECIMAL
---- RUNTIME_PROFILE
aggregation(SUM, NumStatsFilteredPages): 50
====
---- QUERY
# Selecting the second rows of the pages of 'd_1', skipping values before and after.
select * from decimals_1_10 where d_10 = 2
---- RESULTS
2,2
2,2
NULL,2
2,2
2,2
2,2
NULL,2
NULL,2
2,2
2,2
2,2
---- TYPES
DECIMAL, DECIMAL
---- RUNTIME_PROFILE
aggregation(SUM, NumStatsFilteredPages): 50
====
---- QUERY
# Selecting the third rows of the pages of 'd_1', skipping values before and after.
select * from decimals_1_10 where d_10 = 3
---- RESULTS
3,3
3,3
3,3
NULL,3
3,3
3,3
3,3
3,3
3,3
NULL,3
3,3
---- TYPES
DECIMAL, DECIMAL
---- RUNTIME_PROFILE
aggregation(SUM, NumStatsFilteredPages): 50
====
---- QUERY
# Selecting the fourth rows of the pages of 'd_1', skipping values before and after.
select * from decimals_1_10 where d_10 = 4
---- RESULTS
4,4
4,4
4,4
4,4
NULL,4
4,4
4,4
NULL,4
NULL,4
NULL,4
4,4
---- TYPES
DECIMAL, DECIMAL
---- RUNTIME_PROFILE
aggregation(SUM, NumStatsFilteredPages): 50
====
---- QUERY
# 'd_10 = 5' selects the last row from each page. Therefore in the pages of 'd_1' we
# skip the first four values, then read the last.
select * from decimals_1_10 where d_10 = 5
---- RESULTS
5,5
5,5
5,5
5,5
5,5
NULL,5
5,5
5,5
NULL,5
5,5
---- TYPES
DECIMAL, DECIMAL
---- RUNTIME_PROFILE
aggregation(SUM, NumStatsFilteredPages): 52
====
---- QUERY
# Selecting the first couple of rows from the pages of 'd_1'. Skips last rows.
select * from decimals_1_10 where d_10 < 3
---- RESULTS
1,1
2,2
NULL,1
2,2
1,1
NULL,2
1,1
2,2
1,1
2,2
1,1
2,2
NULL,1
NULL,2
1,1
NULL,2
1,1
2,2
NULL,1
2,2
1,1
2,2
---- TYPES
DECIMAL, DECIMAL
---- RUNTIME_PROFILE
aggregation(SUM, NumStatsFilteredPages): 39
====
---- QUERY
# Selecting the last couple of rows from the pages of 'd_1'. Skips first rows.
select * from decimals_1_10 where d_10 > 2
---- RESULTS
3,3
4,4
5,5
3,3
4,4
5,5
3,3
4,4
5,5
NULL,3
4,4
5,5
3,3
NULL,4
5,5
3,3
4,4
NULL,5
3,3
4,4
5,5
3,3
NULL,4
5,5
3,3
NULL,4
NULL,5
NULL,3
NULL,4
5,5
7,7
8,8
9,9
8,8
7,7
3,3
4,4
---- TYPES
DECIMAL, DECIMAL
---- RUNTIME_PROFILE
aggregation(SUM, NumStatsFilteredPages): 23
====
---- QUERY
# Skipping middle row in a page.
select * from decimals_1_10 where d_10 > 5 and d_10 < 9
---- RESULTS
7,7
8,8
8,8
7,7
---- TYPES
DECIMAL, DECIMAL
---- RUNTIME_PROFILE
aggregation(SUM, NumStatsFilteredPages): 67
====
---- QUERY
# Only reading middle rows in a page.
select * from decimals_1_10 where d_10 > 7
---- RESULTS
8,8
9,9
8,8
---- TYPES
DECIMAL, DECIMAL
---- RUNTIME_PROFILE
aggregation(SUM, NumStatsFilteredPages): 68
====
---- QUERY
# Row group level minimum is 1, maximum is 9. But there is a gap between the pages,
# therefore with page-level statistics we can filter out the whole row group.
select * from decimals_1_10 where d_10 = 6
---- RESULTS
---- TYPES
DECIMAL, DECIMAL
---- RUNTIME_PROFILE
aggregation(SUM, NumStatsFilteredRowGroups): 1
====