| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include "kudu/tablet/compaction_policy.h" |
| |
| #include <algorithm> |
| #include <initializer_list> |
| #include <limits> |
| #include <memory> |
| #include <ostream> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include <gflags/gflags_declare.h> |
| #include <glog/logging.h> |
| #include <glog/stl_logging.h> |
| #include <gtest/gtest.h> |
| |
| #include "kudu/gutil/stringprintf.h" |
| #include "kudu/gutil/strings/numbers.h" |
| #include "kudu/gutil/strings/split.h" |
| #include "kudu/gutil/strings/substitute.h" |
| #include "kudu/tablet/mock-rowsets.h" |
| #include "kudu/tablet/rowset.h" |
| #include "kudu/tablet/rowset_info.h" |
| #include "kudu/tablet/rowset_tree.h" |
| #include "kudu/util/env.h" |
| #include "kudu/util/faststring.h" |
| #include "kudu/util/path_util.h" |
| #include "kudu/util/status.h" |
| #include "kudu/util/stopwatch.h" |
| #include "kudu/util/test_macros.h" |
| #include "kudu/util/test_util.h" |
| |
| using std::string; |
| using std::vector; |
| |
| DECLARE_double(compaction_minimum_improvement); |
| DECLARE_double(compaction_small_rowset_tradeoff); |
| DECLARE_int64(budgeted_compaction_target_rowset_size); |
| |
| namespace kudu { |
| namespace tablet { |
| |
| class TestCompactionPolicy : public KuduTest { |
| protected: |
| void RunTestCase(const RowSetVector& vec, |
| int size_budget_mb, |
| CompactionSelection* picked, |
| double* quality) { |
| RowSetTree tree; |
| ASSERT_OK(tree.Reset(vec)); |
| |
| BudgetedCompactionPolicy policy(size_budget_mb); |
| |
| ASSERT_OK(policy.PickRowSets(tree, picked, quality, /*log=*/nullptr)); |
| } |
| }; |
| |
| // Simple test for budgeted compaction: with three rowsets which |
| // mostly overlap, and a high budget, they should all be selected. |
| TEST_F(TestCompactionPolicy, TestBudgetedSelection) { |
| // This tests the overlap-based part of compaction policy and not the |
| // rowset-size based policy. |
| FLAGS_compaction_small_rowset_tradeoff = 0.0; |
| |
| /* |
| * [C ------ c] |
| * [B ----- a] |
| * [A ------- b] |
| */ |
| const RowSetVector rowsets = { |
| std::make_shared<MockDiskRowSet>("C", "c"), |
| std::make_shared<MockDiskRowSet>("B", "a"), |
| std::make_shared<MockDiskRowSet>("A", "b") |
| }; |
| |
| constexpr auto kBudgetMb = 1000; // Enough to select all rowsets. |
| CompactionSelection picked; |
| double quality = 0.0; |
| NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality)); |
| ASSERT_EQ(rowsets.size(), picked.size()); |
| // Since all three rowsets are picked and each covers almost the entire key |
| // range, the sum of widths is about 3 while the union width is 1, so the |
| // quality score should be between 1 and 2. |
| ASSERT_GE(quality, 1.0); |
| ASSERT_LE(quality, 2.0); |
| } |
| |
| // Test for the case when we have many rowsets, but none of them |
| // overlap at all. This is likely to occur in workloads where the |
| // primary key is always increasing (such as a timestamp). |
| TEST_F(TestCompactionPolicy, TestNonOverlappingRowSets) { |
| // This tests the overlap-based part of compaction policy and not the |
| // rowset-size based policy. |
| FLAGS_compaction_small_rowset_tradeoff = 0.0; |
| |
| /* NB: Zero-padding of string keys omitted to save space. |
| * |
| * [0 - 1] [2 - 3] ... [198 - 199] |
| */ |
| const auto kNumRowSets = AllowSlowTests() ? 10000 : 100; |
| RowSetVector rowsets; |
| for (auto i = 0; i < kNumRowSets; i++) { |
| rowsets.emplace_back(new MockDiskRowSet( |
| StringPrintf("%010d", i * 2), |
| StringPrintf("%010d", i * 2 + 1))); |
| } |
| constexpr auto kBudgetMb = 128; |
| CompactionSelection picked; |
| double quality = 0.0; |
| NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality)); |
| ASSERT_EQ(0.0, quality); |
| ASSERT_TRUE(picked.empty()); |
| } |
| |
| // Similar to the above, but with some small overlap between adjacent |
| // rowsets. |
| TEST_F(TestCompactionPolicy, TestTinyOverlapRowSets) { |
| // This tests the overlap-based part of compaction policy and not the |
| // rowset-size based policy. |
| FLAGS_compaction_small_rowset_tradeoff = 0.0; |
| |
| /* NB: Zero-padding of string keys omitted to save space. |
| * |
| * [0 - 11000] |
| * [10000 - 21000] |
| * ... |
| * [990000 - 1001000] |
| */ |
| const auto kNumRowSets = AllowSlowTests() ? 10000 : 100; |
| RowSetVector rowsets; |
| for (auto i = 0; i < kNumRowSets; i++) { |
| rowsets.emplace_back(new MockDiskRowSet( |
| StringPrintf("%09d", i * 10000), |
| StringPrintf("%09d", i * 10000 + 11000))); |
| } |
| constexpr auto kBudgetMb = 128; |
| CompactionSelection picked; |
| double quality = 0; |
| NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality)); |
| // With such small overlaps, no compaction will be considered worthwhile. |
| ASSERT_EQ(0.0, quality); |
| ASSERT_TRUE(picked.empty()); |
| } |
| |
| // Test case with 100 rowsets, each of which overlaps with its two |
| // neighbors to the right. |
| TEST_F(TestCompactionPolicy, TestSignificantOverlap) { |
| // This tests the overlap-based part of compaction policy and not the |
| // rowset-size based policy. |
| FLAGS_compaction_small_rowset_tradeoff = 0.0; |
| |
| /* NB: Zero-padding of string keys omitted to save space. |
| * |
| * [0 ------ 20000] |
| * [10000 ------ 30000] |
| * [20000 ------ 40000] |
| * ... |
| * [990000 - 1010000] |
| */ |
| constexpr auto kNumRowSets = 100; |
| RowSetVector rowsets; |
| for (auto i = 0; i < kNumRowSets; i++) { |
| rowsets.emplace_back(new MockDiskRowSet( |
| StringPrintf("%010d", i * 10000), |
| StringPrintf("%010d", (i + 2) * 10000))); |
| } |
| constexpr auto kBudgetMb = 64; |
| CompactionSelection picked; |
| double quality = 0.0; |
| NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality)); |
| // Each rowset is 1MB so the number of rowsets picked should be the number of |
| // MB in the budget. |
| ASSERT_EQ(kBudgetMb, picked.size()); |
| |
| // In picking 64 of 100 equally-sized and -spaced rowsets, the union width is |
| // about 0.64. Each rowset intersects the next in half its width, which makes |
| // the sum of widths about 2 * 0.64 = 1.28, if we ignore the smaller overlap |
| // between the ith and (i + 2)th rowsets. So, the quality score should be |
| // around 0.64. |
| ASSERT_GT(quality, 0.5); |
| } |
| |
| // Test the adjustment we make to the quality measure that penalizes wider |
| // solutions that have almost the same quality score. For example, with no |
| // adjustment and inputs |
| // |
| // [ -- A -- ] |
| // [ -- B -- ] |
| // [ -- C -- ] |
| // |
| // compacting {A, B, C} results in the same reduction in average tablet height |
| // as compacting {A, B}, but uses more I/O. We'd like to choose {A, B} if |
| // A, B, and C are large, but {A, B, C} if A, B, and C are small. |
| TEST_F(TestCompactionPolicy, TestSupportAdjust) { |
| constexpr auto kBudgetMb = 1000; // Enough to select all rowsets. |
| CompactionSelection picked; |
| double quality = 0.0; |
| |
| // For big rowsets, compaction should favor just compacting the two that |
| // overlap. |
| const auto big_rowset_size = FLAGS_budgeted_compaction_target_rowset_size * 0.9; |
| const RowSetVector big_rowsets = { |
| std::make_shared<MockDiskRowSet>("A", "B", big_rowset_size), |
| std::make_shared<MockDiskRowSet>("A", "B", big_rowset_size), |
| std::make_shared<MockDiskRowSet>("B", "C", big_rowset_size), |
| }; |
| NO_FATALS(RunTestCase(big_rowsets, kBudgetMb, &picked, &quality)); |
| ASSERT_EQ(2, picked.size()); |
| // The widths of A and B are both 0.67, so with the adjustment the quality |
| // score should be a little less than 0.67. |
| ASSERT_GE(quality, 0.6); |
| |
| // For small rowsets, compaction should favor compacting all three. |
| const auto small_rowset_size = FLAGS_budgeted_compaction_target_rowset_size * 0.3; |
| const RowSetVector small_rowsets = { |
| std::make_shared<MockDiskRowSet>("A", "B", small_rowset_size), |
| std::make_shared<MockDiskRowSet>("A", "B", small_rowset_size), |
| std::make_shared<MockDiskRowSet>("B", "C", small_rowset_size), |
| }; |
| picked.clear(); |
| NO_FATALS(RunTestCase(small_rowsets, kBudgetMb, &picked, &quality)); |
| ASSERT_EQ(3, picked.size()); |
| ASSERT_GE(quality, 0.6); |
| } |
| |
| static RowSetVector LoadFile(const string& name) { |
| RowSetVector rowsets; |
| const string path = JoinPathSegments(GetTestExecutableDirectory(), name); |
| faststring data; |
| CHECK_OK_PREPEND(ReadFileToString(Env::Default(), path, &data), |
| strings::Substitute("unable to load test data file $0", path)); |
| const vector<string> lines = strings::Split(data.ToString(), "\n"); |
| for (const auto& line : lines) { |
| if (line.empty() || line[0] == '#') continue; |
| const vector<string> fields = strings::Split(line, "\t"); |
| CHECK_EQ(3, fields.size()) << "Expected 3 fields on line: " << line; |
| const int size_mb = ParseLeadingInt32Value(fields[0], -1); |
| CHECK_GE(size_mb, 1) << "Expected size at least 1MB on line: " << line; |
| rowsets.emplace_back(new MockDiskRowSet(fields[1] /* min key */, |
| fields[2] /* max key */, |
| size_mb * 1024 * 1024)); |
| } |
| |
| return rowsets; |
| } |
| |
| // Realistic test using data scraped from a tablet containing 200GB+ of YCSB |
| // data. This test can be used as a benchmark for optimizing the compaction |
| // policy, and also serves as a basic regression/stress test using real data. |
| TEST_F(TestCompactionPolicy, TestYcsbCompaction) { |
| SKIP_IF_SLOW_NOT_ALLOWED(); |
| const RowSetVector rowsets = LoadFile("testdata/ycsb-test-rowsets.tsv"); |
| RowSetTree tree; |
| ASSERT_OK(tree.Reset(rowsets)); |
| vector<double> qualities; |
| for (int budget_mb : {128, 256, 512, 1024}) { |
| BudgetedCompactionPolicy policy(budget_mb); |
| |
| CompactionSelection picked; |
| double quality = 0.0; |
| LOG_TIMING(INFO, strings::Substitute("computing compaction with $0MB budget", |
| budget_mb)) { |
| ASSERT_OK(policy.PickRowSets(tree, &picked, &quality, /*log=*/nullptr)); |
| } |
| LOG(INFO) << "quality=" << quality; |
| int total_size = 0; |
| for (const auto* rs : picked) { |
| total_size += rs->OnDiskBaseDataSizeWithRedos() / 1024 / 1024; |
| } |
| ASSERT_LE(total_size, budget_mb); |
| qualities.push_back(quality); |
| } |
| |
| // Since the budget increased in each iteration, the qualities should be |
| // non-decreasing. |
| ASSERT_TRUE(std::is_sorted(qualities.begin(), qualities.end())) << qualities; |
| } |
| |
| // Regression test for KUDU-2251 which ensures that large (> 2GiB) rowsets don't |
| // cause integer overflow in compaction planning. |
| TEST_F(TestCompactionPolicy, KUDU2251) { |
| // Raise the target size to keep the quality score in the same range as for |
| // "normal size" rowsets. |
| FLAGS_budgeted_compaction_target_rowset_size = 1L << 34; |
| |
| // Same arrangement as in TestBudgetedSelection. |
| const RowSetVector rowsets = { |
| std::make_shared<MockDiskRowSet>("C", "c", 1L << 31), |
| std::make_shared<MockDiskRowSet>("B", "a", 1L << 32), |
| std::make_shared<MockDiskRowSet>("A", "b", 1L << 33) |
| }; |
| |
| constexpr auto kBudgetMb = 1L << 30; // Enough to select all rowsets. |
| CompactionSelection picked; |
| double quality = 0.0; |
| NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality)); |
| ASSERT_EQ(rowsets.size(), picked.size()); |
| ASSERT_GT(quality, 1.0); |
| ASSERT_LT(quality, 2.0); |
| } |
| |
| // Test that we don't compact together non-overlapping rowsets if doing so |
| // doesn't actually reduce the number of rowsets. |
| TEST_F(TestCompactionPolicy, TestSmallRowsetCompactionReducesRowsetCount) { |
| constexpr auto kBudgetMb = 1000000; // Enough to select all rowsets in all cases. |
| const auto num_rowsets = 100; |
| // Use rowsets close enough to the target size that 'num_rowsets' count of |
| // them compacted together does not reduce the count of rowsets. |
| const auto rowset_size = |
| FLAGS_budgeted_compaction_target_rowset_size * (1 - 1.0 / (num_rowsets - 1)); |
| RowSetVector rowsets; |
| CompactionSelection picked; |
| double quality = 0.0; |
| for (int i = 0; i < num_rowsets; i++) { |
| picked.clear(); |
| |
| // [0 - 1][1 - 2] ... [98 - 99][99 - 100] built up over time. |
| rowsets.push_back(std::make_shared<MockDiskRowSet>( |
| StringPrintf("%010d", i), |
| StringPrintf("%010d", i + 1), |
| rowset_size)); |
| NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality)); |
| |
| // There should never be a worthwhile compaction. |
| ASSERT_TRUE(picked.empty()); |
| ASSERT_EQ(0.0, quality); |
| } |
| } |
| |
| // Test that the score of compacting two rowsets decreases monotonically as the |
| // size of the two rowsets increases, and that the tradeoff factor is set |
| // correctly so that a compaction actually reduce the count of rowsets. |
| TEST_F(TestCompactionPolicy, TestSmallRowsetTradeoffFactor) { |
| constexpr auto kBudgetMb = 1000; // Enough to select all rowsets in all cases. |
| CompactionSelection picked; |
| double quality = 0.0; |
| double prev_quality = std::numeric_limits<double>::max(); |
| |
| // Compact two equal-sized rowsets that are various fractions of the target |
| // rowset size. |
| const double target_size_bytes = |
| FLAGS_budgeted_compaction_target_rowset_size; |
| for (const auto divisor : { 32, 16, 8, 4, 2, 1 }) { |
| SCOPED_TRACE(strings::Substitute("divisor = $0", divisor)); |
| picked.clear(); |
| const auto size_bytes = target_size_bytes / divisor; |
| /* |
| * [A -- B][B -- C] |
| */ |
| const RowSetVector rowsets = { |
| std::make_shared<MockDiskRowSet>("A", "B", size_bytes), |
| std::make_shared<MockDiskRowSet>("B", "C", size_bytes), |
| }; |
| NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality)); |
| |
| // The quality score should monotonically decrease. |
| ASSERT_LE(quality, prev_quality); |
| prev_quality = quality; |
| |
| // As a spot check of the prioritization, we should stop wanting to compact |
| // when the divisor is two or one. |
| if (divisor > 2) { |
| ASSERT_EQ(rowsets.size(), picked.size()); |
| } else { |
| ASSERT_LT(quality, FLAGS_compaction_minimum_improvement); |
| ASSERT_TRUE(picked.empty()); |
| } |
| } |
| |
| // However, compacting a target rowset size's worth of data with it split |
| // between more than two rowsets should result in a compaction. |
| const auto size_bytes = target_size_bytes / 3; |
| /* |
| * [A -- B][B -- C][C -- D] |
| */ |
| const RowSetVector rowsets = { |
| std::make_shared<MockDiskRowSet>("A", "B", size_bytes), |
| std::make_shared<MockDiskRowSet>("B", "C", size_bytes), |
| std::make_shared<MockDiskRowSet>("C", "D", size_bytes), |
| }; |
| NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality)); |
| ASSERT_EQ(rowsets.size(), picked.size()); |
| ASSERT_GT(quality, FLAGS_compaction_minimum_improvement); |
| } |
| |
| // Compaction policy is designed so that height-reducing compactions should |
| // generally take priority over size-based compactions. So, in particular, |
| // given a layout like |
| // |
| // [ -- 32MiB -- ] [ 8MiB ][ 8 MiB ][8 MiB ][ 8MiB ] |
| // [ -- 32MiB -- ] |
| // |
| // and a budget of 64MiB, the policy should choose to compact the two |
| // overlapping rowsets despite the fact that that selection has a size-score |
| // of 0, while compacting one or more of the 8MiB rowsets has a big size score. |
| TEST_F(TestCompactionPolicy, TestHeightBasedDominatesSizeBased) { |
| constexpr auto kBigRowSetSizeBytes = 32 * 1024 * 1024; |
| constexpr auto kSmallRowSetSizeBytes = 8 * 1024 * 1024; |
| constexpr auto kBudgetMb = 64; |
| const RowSetVector rowsets = { |
| std::make_shared<MockDiskRowSet>("A", "B", kBigRowSetSizeBytes), |
| std::make_shared<MockDiskRowSet>("A", "B", kBigRowSetSizeBytes), |
| std::make_shared<MockDiskRowSet>("C", "D", kSmallRowSetSizeBytes), |
| std::make_shared<MockDiskRowSet>("D", "E", kSmallRowSetSizeBytes), |
| std::make_shared<MockDiskRowSet>("E", "F", kSmallRowSetSizeBytes), |
| std::make_shared<MockDiskRowSet>("F", "G", kSmallRowSetSizeBytes), |
| }; |
| |
| CompactionSelection picked; |
| double quality = 0.0; |
| NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality)); |
| ASSERT_EQ(2, picked.size()); |
| for (const auto* rowset : picked) { |
| ASSERT_EQ(kBigRowSetSizeBytes, rowset->OnDiskSize()); |
| } |
| } |
| |
| namespace { |
| double ComputeAverageRowsetHeight( |
| const vector<std::pair<string, string>>& intervals) { |
| RowSetVector rowsets; |
| for (const auto& interval : intervals) { |
| rowsets.push_back(std::make_shared<MockDiskRowSet>(interval.first, |
| interval.second)); |
| } |
| RowSetTree tree; |
| CHECK_OK(tree.Reset(rowsets)); |
| |
| double rowset_total_height, rowset_total_width; |
| RowSetInfo::ComputeCdfAndCollectOrdered(tree, |
| &rowset_total_height, |
| &rowset_total_width, |
| nullptr, |
| nullptr); |
| return rowset_total_width > 0 ? rowset_total_height / rowset_total_width : 0.0; |
| } |
| } // anonymous namespace |
| |
| class KeySpaceCdfTest : public KuduTest { |
| protected: |
| static void AssertWithinEpsilon(double epsilon, |
| double expected, |
| double actual) { |
| ASSERT_GE(actual, expected - epsilon); |
| ASSERT_LE(actual, expected + epsilon); |
| } |
| }; |
| |
| // Test the computation of average rowset heights. This isn't strictly used in |
| // compaction policy, but this is a convenient place to put it, for now. |
| TEST_F(KeySpaceCdfTest, TestComputingAverageRowSetHeight) { |
| // No rowsets. |
| EXPECT_EQ(0.0, ComputeAverageRowsetHeight({ })); |
| |
| /* A rowset that's one key wide. |
| * | |
| */ |
| EXPECT_EQ(0.0, ComputeAverageRowsetHeight({ { "A", "A" } })); |
| |
| /* A single rowset. |
| * [ --- ] |
| */ |
| EXPECT_EQ(1.0, ComputeAverageRowsetHeight({ { "A", "B" } })); |
| |
| /* Two rowsets with no empty space between. |
| * [ --- ][ --- ] |
| */ |
| EXPECT_EQ(1.0, ComputeAverageRowsetHeight({ { "A", "B" }, { "B", "C" } })); |
| |
| |
| /* Three rowsets with no empty spaces between. |
| * [ --- ][ --- ][ --- ] |
| */ |
| EXPECT_EQ(1.0, ComputeAverageRowsetHeight({ { "A", "B" }, |
| { "B", "C" }, |
| { "C", "D" } })); |
| |
| /* Two rowsets with empty space between them. |
| * [ --- ] [ --- ] |
| */ |
| EXPECT_EQ(1.0, ComputeAverageRowsetHeight({ { "A", "B" }, { "C", "D" } })); |
| |
| /* Three rowsets with empty space between them. |
| * [ --- ] [ --- ] [ --- ] |
| */ |
| EXPECT_EQ(1.0, ComputeAverageRowsetHeight({ { "A", "B" }, |
| { "C", "D" }, |
| { "E", "F" } })); |
| |
| /* Three rowsets with empty space between them, and one is a single key. |
| * [ --- ] | [ --- ] |
| */ |
| EXPECT_EQ(1.0, ComputeAverageRowsetHeight({ { "A", "B" }, |
| { "C", "C" }, |
| { "D", "D" } })); |
| |
| /* Two rowsets that completely overlap. |
| * [ --- ] |
| * [ --- ] |
| */ |
| EXPECT_EQ(2.0, ComputeAverageRowsetHeight({ { "A", "B" }, { "A", "B" } })); |
| |
| /* Three rowsets that completely overlap, but two are single keys, and the |
| * overlaps are on the boundaries. |
| * [ --- ] |
| * | | |
| */ |
| EXPECT_EQ(1.0, ComputeAverageRowsetHeight({ { "A", "B" }, |
| { "A", "A" }, |
| { "B", "B" } })); |
| |
| /* Three rowsets that completely overlap. |
| * [ --- ] |
| * [ --- ] |
| * [ --- ] |
| */ |
| EXPECT_EQ(3.0, ComputeAverageRowsetHeight({ { "A", "B" }, |
| { "A", "B" }, |
| { "A", "B" } })); |
| |
| /* Three rowsets that completely overlap, but one is a single key. |
| * [ --- ] |
| * [ --- ] |
| * | |
| */ |
| EXPECT_EQ(2.0, ComputeAverageRowsetHeight({ { "A", "C" }, |
| { "A", "C" }, |
| { "B", "B" } })); |
| |
| /* Two rowsets that partially overlap. |
| * [ --- ] |
| * [ --- ] |
| */ |
| EXPECT_EQ(1.5, ComputeAverageRowsetHeight({ { "A", "C" }, { "B", "D" } })); |
| |
| // Now the numbers stop being round so we'll check for being within some small |
| // range of an expected number. |
| constexpr auto epsilon = 0.001; |
| |
| /* Three rowsets that partially overlap. |
| * [ --- ] |
| * [ --- ] |
| * [ --- ] |
| */ |
| AssertWithinEpsilon(epsilon, |
| 1.66667, |
| ComputeAverageRowsetHeight({ { "A", "C" }, |
| { "B", "D" }, |
| { "C", "E" } })); |
| |
| /* Three rowsets that partially overlap. |
| * [ --- ] |
| * [ --- ] |
| * [ --- ] |
| */ |
| AssertWithinEpsilon(epsilon, |
| 2.33333, |
| ComputeAverageRowsetHeight({ { "A", "C" }, |
| { "B", "D" }, |
| { "A", "C" } })); |
| |
| /* Five rowsets that partially overlap. |
| * [ --- ][ --- ] |
| * [ --- ] |
| * [ --- ][ --- ] |
| */ |
| AssertWithinEpsilon(epsilon, |
| 2.6, |
| ComputeAverageRowsetHeight({ { "A", "C" }, { "C", "E" }, |
| { "B", "D" }, |
| { "A", "C" }, { "C", "E" } })); |
| |
| /* A messy collection of rowsets overlapping in all kinds of ways. |
| * [ --- ][ --- ] [ --- ] [ --- ] |
| * [ --- ] [ --- ] |
| * [ --- ] | [ --- ] | |
| */ |
| AssertWithinEpsilon( |
| epsilon, |
| 2.3125, |
| ComputeAverageRowsetHeight( |
| { { "A", "C" }, { "C", "E" }, { "F", "G" }, { "H", "J" }, |
| { "B", "D" }, { "F", "I" }, |
| { "A", "C" }, { "D", "D" }, { "F", "G" }, { "I", "I" } })); |
| } |
| |
| // A regression test for KUDU-2704. If rowsets are larger than the target size |
| // but there is fruitful height-based compaction that is within budget, it |
| // might not occur because the size-based portion of the rowset's value is |
| // negative. |
| TEST_F(TestCompactionPolicy, TestKUDU2704) { |
| CompactionSelection picked; |
| double quality = 0.0; |
| |
| constexpr auto kNumRowSets = 100; |
| const auto big_rowset_size = FLAGS_budgeted_compaction_target_rowset_size * 2; |
| const auto budget_mb = 2 * big_rowset_size / 1024 / 1024; |
| RowSetVector rowsets; |
| for (auto i = 0; i < kNumRowSets; i++) { |
| rowsets.emplace_back(new MockDiskRowSet( |
| StringPrintf("%010d", i * 2), |
| StringPrintf("%010d", i * 2 + 1), |
| big_rowset_size)); |
| } |
| // Add one rowset overlapping the first rowset, so that it decreases average |
| // rowset height to compact this one and the first one, but the average rowset |
| // height doesn't decrease by much. |
| rowsets.emplace_back(new MockDiskRowSet("0000000000", |
| "0000000001", |
| big_rowset_size)); |
| NO_FATALS(RunTestCase(rowsets, budget_mb, &picked, &quality)); |
| |
| // We should still pick two rowsets because we do not allow the above-target |
| // size of the rowsets to hurt compaction based on average height reduction. |
| ASSERT_EQ(2, picked.size()); |
| ASSERT_GT(quality, 0.0); |
| } |
| } // namespace tablet |
| } // namespace kudu |