blob: b67603a3ff1e7da01f64ebdf0cf3d3747a96226e [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "exec/parquet/parquet-common.h"
#include "testutil/gtest-util.h"
#include "common/names.h"
namespace impala {
using RangeVec = vector<RowRange>;
void ValidateRanges(RangeVec skip_ranges, int num_rows, const RangeVec& expected,
bool should_succeed = true) {
RangeVec result;
bool success = ComputeCandidateRanges(num_rows, &skip_ranges, &result);
EXPECT_EQ(should_succeed, success);
if (success) EXPECT_EQ(expected, result);
}
void ValidateRangesError(RangeVec skip_ranges, int num_rows, const RangeVec& expected) {
ValidateRanges(skip_ranges, num_rows, expected, false);
}
/// This test exercises the logic of ComputeCandidateRanges() with various
/// inputs. ComputeCandidateRanges() determines the row ranges we need to scan
/// in a Parquet file based on the total row count and a set of ranges that we
/// need to skip.
TEST(ParquetCommon, ComputeCandidateRanges) {
ValidateRanges({}, -1, {});
ValidateRanges({{0, 5}}, 10, {{6, 9}});
ValidateRanges({{6, 9}}, 10, {{0, 5}});
ValidateRanges({{0, 9}}, 10, {});
ValidateRanges({{2, 4}, {2, 4}}, 10, {{0, 1}, {5, 9}});
ValidateRanges({{2, 4}, {6, 6}}, 10, {{0, 1}, {5, 5}, {7, 9}});
ValidateRanges({{2, 4}, {3, 7}}, 10, {{0, 1}, {8, 9}});
ValidateRanges({{2, 6}, {1, 8}}, 10, {{0, 0}, {9, 9}});
ValidateRanges({{1, 2}, {2, 5}, {7, 8}}, 10, {{0, 0}, {6, 6}, {9, 9}});
ValidateRanges({{0, 2}, {6, 8}, {3, 5}}, 10, {{9, 9}});
ValidateRanges({{1, 2}, {1, 4}, {1, 8}}, 10, {{0, 0}, {9, 9}});
ValidateRanges({{7, 8}, {1, 8}, {3, 8}}, 10, {{0, 0}, {9, 9}});
// Error cases:
// Range starts at negative number
ValidateRangesError({{-1, 1}}, 10, {});
// Range is not a subrange of [0..num_rows)
ValidateRangesError({{2, 12}}, 10, {});
// First > last
ValidateRangesError({{6, 3}}, 10, {});
}
void ValidatePages(const vector<int64_t>& first_row_indexes, const RangeVec& ranges,
int64_t num_rows, const vector<int>& expected_page_indexes,
bool should_succeed = true) {
vector<parquet::PageLocation> page_locations;
for (int64_t first_row_index : first_row_indexes) {
parquet::PageLocation page_loc;
page_loc.first_row_index = first_row_index;
page_locations.push_back(page_loc);
}
vector<int> candidate_pages;
bool success = ComputeCandidatePages(page_locations, ranges, num_rows,
&candidate_pages);
EXPECT_EQ(should_succeed, success);
if (success) EXPECT_EQ(expected_page_indexes, candidate_pages);
}
void ValidatePagesError(const vector<int64_t>& first_row_indexes, const RangeVec& ranges,
int64_t num_rows, const vector<int>& expected_page_indexes) {
ValidatePages(first_row_indexes, ranges, num_rows, expected_page_indexes, false);
}
/// This test exercises the logic of ComputeCandidatePages(). It creates fake vectors
/// of page location objects and candidate ranges then checks whether the right pages
/// were selected.
TEST(ParquetCommon, ComputeCandidatePages) {
ValidatePages({0}, {}, 10, {});
ValidatePages({0}, {{2, 3}}, 10, {0});
ValidatePages({0}, {{0, 9}}, 10, {0});
ValidatePages({0, 10, 20, 50, 70}, {{0, 9}}, 100, {0});
ValidatePages({0, 10, 20, 50, 70}, {{5, 15}}, 100, {0, 1});
ValidatePages({0, 10, 20, 50, 70}, {{5, 15}}, 100, {0, 1});
ValidatePages({0, 10, 20, 50, 70}, {{5, 15}, {21, 50}}, 100, {0, 1, 2, 3});
ValidatePages({0, 10, 20, 50, 70}, {{90, 92}}, 100, {4});
ValidatePages({0, 10, 20, 50, 70},
{{0, 9}, {10, 19}, {20, 49}, {50, 69}, {70, 99}}, 100, {0, 1, 2, 3, 4});
ValidatePages({0, 10, 20, 50, 70}, {{5, 75}}, 100, {0, 1, 2, 3, 4});
ValidatePages({0, 10, 20, 50, 70}, {{9, 10}, {69, 70}}, 100, {0, 1, 3, 4});
ValidatePages({0, 10, 20, 50, 70}, {{9, 9}, {10, 10}, {50, 50}}, 100, {0, 1, 3});
ValidatePages({0}, {{0, 9LL + INT_MAX}}, 100LL + INT_MAX, {0});
ValidatePages({0LL, 10LL + INT_MAX, 20LL + INT_MAX, 50LL + INT_MAX, 70LL + INT_MAX},
{{0LL, 9LL + INT_MAX}}, 100LL + INT_MAX, {0});
ValidatePages({0LL, 10LL + UINT_MAX, 20LL + UINT_MAX, 50LL + UINT_MAX, 70LL + UINT_MAX},
{{0LL, 9LL + UINT_MAX}}, 100LL + UINT_MAX, {0});
// Error cases:
// Negative first row index.
ValidatePagesError({-1, 0, 10}, {{0, 10}}, 10, {0});
// First row index greater then number of rows.
ValidatePagesError({5, 10, 15}, {{0, 10}}, 10, {0});
// First row indexes are not in order.
ValidatePagesError({0, 5, 3}, {{0, 10}}, 10, {0});
// Row ranges don't overlap with pages.
ValidatePagesError({0, 5, 10}, {{15, 20}}, 12, {0});
}
}