blob: 9d74c607480dbb8514c3babecb7ce46a8569e3fb [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <glog/stl_logging.h>
#include <gtest/gtest.h>
#include <memory>
#include <unordered_set>
#include <string>
#include "kudu/gutil/strings/numbers.h"
#include "kudu/gutil/strings/split.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/util/stopwatch.h"
#include "kudu/util/test_util.h"
#include "kudu/tablet/mock-rowsets.h"
#include "kudu/tablet/rowset.h"
#include "kudu/tablet/rowset_tree.h"
#include "kudu/tablet/compaction_policy.h"
using std::shared_ptr;
using std::unordered_set;
using std::string;
using std::vector;
namespace kudu {
namespace tablet {
// Simple test for budgeted compaction: with three rowsets which
// mostly overlap, and an high budget, they should all be selected.
TEST(TestCompactionPolicy, TestBudgetedSelection) {
RowSetVector vec;
vec.push_back(shared_ptr<RowSet>(new MockDiskRowSet("C", "c")));
vec.push_back(shared_ptr<RowSet>(new MockDiskRowSet("B", "a")));
vec.push_back(shared_ptr<RowSet>(new MockDiskRowSet("A", "b")));
RowSetTree tree;
const int kBudgetMb = 1000; // enough to select all
BudgetedCompactionPolicy policy(kBudgetMb);
unordered_set<RowSet*> picked;
double quality = 0;
ASSERT_OK(policy.PickRowSets(tree, &picked, &quality, nullptr));
ASSERT_EQ(3, picked.size());
ASSERT_GE(quality, 1.0);
// Return the directory of the currently-running executable.
static string GetExecutableDir() {
string exec;
return DirName(exec);
static RowSetVector LoadFile(const string& name) {
RowSetVector ret;
string path = JoinPathSegments(GetExecutableDir(), name);
faststring data;
CHECK_OK_PREPEND(ReadFileToString(Env::Default(), path, &data),
strings::Substitute("Unable to load test data file $0", path));
vector<string> lines = strings::Split(data.ToString(), "\n");
for (const auto& line : lines) {
if (line.empty() || line[0] == '#') continue;
vector<string> fields = strings::Split(line, "\t");
CHECK_EQ(3, fields.size()) << "Expected 3 fields on line: " << line;
int size_mb = ParseLeadingInt32Value(fields[0], -1);
CHECK_GE(size_mb, 1) << "Expected size at least 1MB on line: " << line;
ret.emplace_back(new MockDiskRowSet(fields[1] /* min key */,
fields[2] /* max key */,
size_mb * 1024 * 1024));
return ret;
// Realistic test using data scraped from a tablet containing 200+GB of YCSB data.
// This test can be used as a benchmark for optimizing the compaction policy,
// and also serves as a basic regression/stress test using real data.
TEST(TestCompactionPolicy, TestYcsbCompaction) {
RowSetVector vec = LoadFile("ycsb-test-rowsets.tsv");;
RowSetTree tree;
vector<double> qualities;
for (int budget_mb : {128, 256, 512, 1024}) {
BudgetedCompactionPolicy policy(budget_mb);
unordered_set<RowSet*> picked;
double quality = 0;
LOG_TIMING(INFO, strings::Substitute("Computing compaction with $0MB budget", budget_mb)) {
ASSERT_OK(policy.PickRowSets(tree, &picked, &quality, nullptr));
LOG(INFO) << "quality=" << quality;
int total_size = 0;
for (const auto* rs : picked) {
total_size += rs->EstimateOnDiskSize() / 1024 / 1024;
ASSERT_LE(total_size, budget_mb);
// Given increasing budgets, our solutions should also be higher quality.
ASSERT_TRUE(std::is_sorted(qualities.begin(), qualities.end()))
<< qualities;
} // namespace tablet
} // namespace kudu