blob: 193101d460d83229167eb5aece1cc28fcb71ac1a [file] [log] [blame]
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
// Introduction of SyncPoint effectively disabled building and running this test
// in Release build.
// which is a pity, it is a good test
#include <fcntl.h>
#include <algorithm>
#include <set>
#include <thread>
#include <unordered_set>
#include <utility>
#ifndef OS_WIN
#include <unistd.h>
#endif
#ifdef OS_SOLARIS
#include <alloca.h>
#endif
#include "cache/lru_cache.h"
#include "db/db_impl.h"
#include "db/db_test_util.h"
#include "db/dbformat.h"
#include "db/job_context.h"
#include "db/version_set.h"
#include "db/write_batch_internal.h"
#include "env/mock_env.h"
#include "memtable/hash_linklist_rep.h"
#include "monitoring/thread_status_util.h"
#include "port/port.h"
#include "port/stack_trace.h"
#include "rocksdb/cache.h"
#include "rocksdb/compaction_filter.h"
#include "rocksdb/convenience.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/experimental.h"
#include "rocksdb/filter_policy.h"
#include "rocksdb/options.h"
#include "rocksdb/perf_context.h"
#include "rocksdb/slice.h"
#include "rocksdb/slice_transform.h"
#include "rocksdb/snapshot.h"
#include "rocksdb/table.h"
#include "rocksdb/table_properties.h"
#include "rocksdb/thread_status.h"
#include "rocksdb/utilities/checkpoint.h"
#include "rocksdb/utilities/optimistic_transaction_db.h"
#include "rocksdb/utilities/write_batch_with_index.h"
#include "table/block_based_table_factory.h"
#include "table/mock_table.h"
#include "table/plain_table_factory.h"
#include "table/scoped_arena_iterator.h"
#include "util/compression.h"
#include "util/file_reader_writer.h"
#include "util/filename.h"
#include "util/hash.h"
#include "util/mutexlock.h"
#include "util/rate_limiter.h"
#include "util/string_util.h"
#include "util/sync_point.h"
#include "util/testharness.h"
#include "util/testutil.h"
#include "utilities/merge_operators.h"
namespace rocksdb {
class DBTest : public DBTestBase {
public:
DBTest() : DBTestBase("/db_test") {}
};
class DBTestWithParam
: public DBTest,
public testing::WithParamInterface<std::tuple<uint32_t, bool>> {
public:
DBTestWithParam() {
max_subcompactions_ = std::get<0>(GetParam());
exclusive_manual_compaction_ = std::get<1>(GetParam());
}
// Required if inheriting from testing::WithParamInterface<>
static void SetUpTestCase() {}
static void TearDownTestCase() {}
uint32_t max_subcompactions_;
bool exclusive_manual_compaction_;
};
TEST_F(DBTest, MockEnvTest) {
unique_ptr<MockEnv> env{new MockEnv(Env::Default())};
Options options;
options.create_if_missing = true;
options.env = env.get();
DB* db;
const Slice keys[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")};
const Slice vals[] = {Slice("foo"), Slice("bar"), Slice("baz")};
ASSERT_OK(DB::Open(options, "/dir/db", &db));
for (size_t i = 0; i < 3; ++i) {
ASSERT_OK(db->Put(WriteOptions(), keys[i], vals[i]));
}
for (size_t i = 0; i < 3; ++i) {
std::string res;
ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
ASSERT_TRUE(res == vals[i]);
}
Iterator* iterator = db->NewIterator(ReadOptions());
iterator->SeekToFirst();
for (size_t i = 0; i < 3; ++i) {
ASSERT_TRUE(iterator->Valid());
ASSERT_TRUE(keys[i] == iterator->key());
ASSERT_TRUE(vals[i] == iterator->value());
iterator->Next();
}
ASSERT_TRUE(!iterator->Valid());
delete iterator;
// TEST_FlushMemTable() is not supported in ROCKSDB_LITE
#ifndef ROCKSDB_LITE
DBImpl* dbi = reinterpret_cast<DBImpl*>(db);
ASSERT_OK(dbi->TEST_FlushMemTable());
for (size_t i = 0; i < 3; ++i) {
std::string res;
ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
ASSERT_TRUE(res == vals[i]);
}
#endif // ROCKSDB_LITE
delete db;
}
// NewMemEnv returns nullptr in ROCKSDB_LITE since class InMemoryEnv isn't
// defined.
#ifndef ROCKSDB_LITE
TEST_F(DBTest, MemEnvTest) {
unique_ptr<Env> env{NewMemEnv(Env::Default())};
Options options;
options.create_if_missing = true;
options.env = env.get();
DB* db;
const Slice keys[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")};
const Slice vals[] = {Slice("foo"), Slice("bar"), Slice("baz")};
ASSERT_OK(DB::Open(options, "/dir/db", &db));
for (size_t i = 0; i < 3; ++i) {
ASSERT_OK(db->Put(WriteOptions(), keys[i], vals[i]));
}
for (size_t i = 0; i < 3; ++i) {
std::string res;
ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
ASSERT_TRUE(res == vals[i]);
}
Iterator* iterator = db->NewIterator(ReadOptions());
iterator->SeekToFirst();
for (size_t i = 0; i < 3; ++i) {
ASSERT_TRUE(iterator->Valid());
ASSERT_TRUE(keys[i] == iterator->key());
ASSERT_TRUE(vals[i] == iterator->value());
iterator->Next();
}
ASSERT_TRUE(!iterator->Valid());
delete iterator;
DBImpl* dbi = reinterpret_cast<DBImpl*>(db);
ASSERT_OK(dbi->TEST_FlushMemTable());
for (size_t i = 0; i < 3; ++i) {
std::string res;
ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
ASSERT_TRUE(res == vals[i]);
}
delete db;
options.create_if_missing = false;
ASSERT_OK(DB::Open(options, "/dir/db", &db));
for (size_t i = 0; i < 3; ++i) {
std::string res;
ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
ASSERT_TRUE(res == vals[i]);
}
delete db;
}
#endif // ROCKSDB_LITE
TEST_F(DBTest, WriteEmptyBatch) {
Options options = CurrentOptions();
options.env = env_;
options.write_buffer_size = 100000;
CreateAndReopenWithCF({"pikachu"}, options);
ASSERT_OK(Put(1, "foo", "bar"));
WriteOptions wo;
wo.sync = true;
wo.disableWAL = false;
WriteBatch empty_batch;
ASSERT_OK(dbfull()->Write(wo, &empty_batch));
// make sure we can re-open it.
ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
ASSERT_EQ("bar", Get(1, "foo"));
}
TEST_F(DBTest, SkipDelay) {
Options options = CurrentOptions();
options.env = env_;
options.write_buffer_size = 100000;
CreateAndReopenWithCF({"pikachu"}, options);
for (bool sync : {true, false}) {
for (bool disableWAL : {true, false}) {
// Use a small number to ensure a large delay that is still effective
// when we do Put
// TODO(myabandeh): this is time dependent and could potentially make
// the test flaky
auto token = dbfull()->TEST_write_controler().GetDelayToken(1);
std::atomic<int> sleep_count(0);
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DBImpl::DelayWrite:Sleep",
[&](void* arg) { sleep_count.fetch_add(1); });
std::atomic<int> wait_count(0);
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DBImpl::DelayWrite:Wait",
[&](void* arg) { wait_count.fetch_add(1); });
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
WriteOptions wo;
wo.sync = sync;
wo.disableWAL = disableWAL;
wo.no_slowdown = true;
dbfull()->Put(wo, "foo", "bar");
// We need the 2nd write to trigger delay. This is because delay is
// estimated based on the last write size which is 0 for the first write.
ASSERT_NOK(dbfull()->Put(wo, "foo2", "bar2"));
ASSERT_GE(sleep_count.load(), 0);
ASSERT_GE(wait_count.load(), 0);
token.reset();
token = dbfull()->TEST_write_controler().GetDelayToken(1000000000);
wo.no_slowdown = false;
ASSERT_OK(dbfull()->Put(wo, "foo3", "bar3"));
ASSERT_GE(sleep_count.load(), 1);
token.reset();
}
}
}
#ifndef ROCKSDB_LITE
TEST_F(DBTest, LevelLimitReopen) {
Options options = CurrentOptions();
CreateAndReopenWithCF({"pikachu"}, options);
const std::string value(1024 * 1024, ' ');
int i = 0;
while (NumTableFilesAtLevel(2, 1) == 0) {
ASSERT_OK(Put(1, Key(i++), value));
}
options.num_levels = 1;
options.max_bytes_for_level_multiplier_additional.resize(1, 1);
Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
ASSERT_EQ(s.IsInvalidArgument(), true);
ASSERT_EQ(s.ToString(),
"Invalid argument: db has more levels than options.num_levels");
options.num_levels = 10;
options.max_bytes_for_level_multiplier_additional.resize(10, 1);
ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
}
#endif // ROCKSDB_LITE
TEST_F(DBTest, PutSingleDeleteGet) {
do {
CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
ASSERT_OK(Put(1, "foo", "v1"));
ASSERT_EQ("v1", Get(1, "foo"));
ASSERT_OK(Put(1, "foo2", "v2"));
ASSERT_EQ("v2", Get(1, "foo2"));
ASSERT_OK(SingleDelete(1, "foo"));
ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
// Skip HashCuckooRep as it does not support single delete. FIFO and
// universal compaction do not apply to the test case. Skip MergePut
// because single delete does not get removed when it encounters a merge.
} while (ChangeOptions(kSkipHashCuckoo | kSkipFIFOCompaction |
kSkipUniversalCompaction | kSkipMergePut));
}
TEST_F(DBTest, ReadFromPersistedTier) {
do {
Random rnd(301);
Options options = CurrentOptions();
for (int disableWAL = 0; disableWAL <= 1; ++disableWAL) {
CreateAndReopenWithCF({"pikachu"}, options);
WriteOptions wopt;
wopt.disableWAL = (disableWAL == 1);
// 1st round: put but not flush
ASSERT_OK(db_->Put(wopt, handles_[1], "foo", "first"));
ASSERT_OK(db_->Put(wopt, handles_[1], "bar", "one"));
ASSERT_EQ("first", Get(1, "foo"));
ASSERT_EQ("one", Get(1, "bar"));
// Read directly from persited data.
ReadOptions ropt;
ropt.read_tier = kPersistedTier;
std::string value;
if (wopt.disableWAL) {
// as data has not yet being flushed, we expect not found.
ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).IsNotFound());
ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).IsNotFound());
} else {
ASSERT_OK(db_->Get(ropt, handles_[1], "foo", &value));
ASSERT_OK(db_->Get(ropt, handles_[1], "bar", &value));
}
// Multiget
std::vector<ColumnFamilyHandle*> multiget_cfs;
multiget_cfs.push_back(handles_[1]);
multiget_cfs.push_back(handles_[1]);
std::vector<Slice> multiget_keys;
multiget_keys.push_back("foo");
multiget_keys.push_back("bar");
std::vector<std::string> multiget_values;
auto statuses =
db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values);
if (wopt.disableWAL) {
ASSERT_TRUE(statuses[0].IsNotFound());
ASSERT_TRUE(statuses[1].IsNotFound());
} else {
ASSERT_OK(statuses[0]);
ASSERT_OK(statuses[1]);
}
// 2nd round: flush and put a new value in memtable.
ASSERT_OK(Flush(1));
ASSERT_OK(db_->Put(wopt, handles_[1], "rocksdb", "hello"));
// once the data has been flushed, we are able to get the
// data when kPersistedTier is used.
ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).ok());
ASSERT_EQ(value, "first");
ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).ok());
ASSERT_EQ(value, "one");
if (wopt.disableWAL) {
ASSERT_TRUE(
db_->Get(ropt, handles_[1], "rocksdb", &value).IsNotFound());
} else {
ASSERT_OK(db_->Get(ropt, handles_[1], "rocksdb", &value));
ASSERT_EQ(value, "hello");
}
// Expect same result in multiget
multiget_cfs.push_back(handles_[1]);
multiget_keys.push_back("rocksdb");
statuses =
db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values);
ASSERT_TRUE(statuses[0].ok());
ASSERT_EQ("first", multiget_values[0]);
ASSERT_TRUE(statuses[1].ok());
ASSERT_EQ("one", multiget_values[1]);
if (wopt.disableWAL) {
ASSERT_TRUE(statuses[2].IsNotFound());
} else {
ASSERT_OK(statuses[2]);
}
// 3rd round: delete and flush
ASSERT_OK(db_->Delete(wopt, handles_[1], "foo"));
Flush(1);
ASSERT_OK(db_->Delete(wopt, handles_[1], "bar"));
ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).IsNotFound());
if (wopt.disableWAL) {
// Still expect finding the value as its delete has not yet being
// flushed.
ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).ok());
ASSERT_EQ(value, "one");
} else {
ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).IsNotFound());
}
ASSERT_TRUE(db_->Get(ropt, handles_[1], "rocksdb", &value).ok());
ASSERT_EQ(value, "hello");
statuses =
db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values);
ASSERT_TRUE(statuses[0].IsNotFound());
if (wopt.disableWAL) {
ASSERT_TRUE(statuses[1].ok());
ASSERT_EQ("one", multiget_values[1]);
} else {
ASSERT_TRUE(statuses[1].IsNotFound());
}
ASSERT_TRUE(statuses[2].ok());
ASSERT_EQ("hello", multiget_values[2]);
if (wopt.disableWAL == 0) {
DestroyAndReopen(options);
}
}
} while (ChangeOptions(kSkipHashCuckoo));
}
TEST_F(DBTest, SingleDeleteFlush) {
// Test to check whether flushing preserves a single delete hidden
// behind a put.
do {
Random rnd(301);
Options options = CurrentOptions();
options.disable_auto_compactions = true;
CreateAndReopenWithCF({"pikachu"}, options);
// Put values on second level (so that they will not be in the same
// compaction as the other operations.
Put(1, "foo", "first");
Put(1, "bar", "one");
ASSERT_OK(Flush(1));
MoveFilesToLevel(2, 1);
// (Single) delete hidden by a put
SingleDelete(1, "foo");
Put(1, "foo", "second");
Delete(1, "bar");
Put(1, "bar", "two");
ASSERT_OK(Flush(1));
SingleDelete(1, "foo");
Delete(1, "bar");
ASSERT_OK(Flush(1));
dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
nullptr);
ASSERT_EQ("NOT_FOUND", Get(1, "bar"));
ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
// Skip HashCuckooRep as it does not support single delete. FIFO and
// universal compaction do not apply to the test case. Skip MergePut
// because merges cannot be combined with single deletions.
} while (ChangeOptions(kSkipHashCuckoo | kSkipFIFOCompaction |
kSkipUniversalCompaction | kSkipMergePut));
}
TEST_F(DBTest, SingleDeletePutFlush) {
// Single deletes that encounter the matching put in a flush should get
// removed.
do {
Random rnd(301);
Options options = CurrentOptions();
options.disable_auto_compactions = true;
CreateAndReopenWithCF({"pikachu"}, options);
Put(1, "foo", Slice());
Put(1, "a", Slice());
SingleDelete(1, "a");
ASSERT_OK(Flush(1));
ASSERT_EQ("[ ]", AllEntriesFor("a", 1));
// Skip HashCuckooRep as it does not support single delete. FIFO and
// universal compaction do not apply to the test case. Skip MergePut
// because merges cannot be combined with single deletions.
} while (ChangeOptions(kSkipHashCuckoo | kSkipFIFOCompaction |
kSkipUniversalCompaction | kSkipMergePut));
}
// Disable because not all platform can run it.
// It requires more than 9GB memory to run it, With single allocation
// of more than 3GB.
TEST_F(DBTest, DISABLED_VeryLargeValue) {
const size_t kValueSize = 3221225472u; // 3GB value
const size_t kKeySize = 8388608u; // 8MB key
std::string raw(kValueSize, 'v');
std::string key1(kKeySize, 'c');
std::string key2(kKeySize, 'd');
Options options = CurrentOptions();
options.env = env_;
options.write_buffer_size = 100000; // Small write buffer
options.paranoid_checks = true;
DestroyAndReopen(options);
ASSERT_OK(Put("boo", "v1"));
ASSERT_OK(Put("foo", "v1"));
ASSERT_OK(Put(key1, raw));
raw[0] = 'w';
ASSERT_OK(Put(key2, raw));
dbfull()->TEST_WaitForFlushMemTable();
ASSERT_EQ(1, NumTableFilesAtLevel(0));
std::string value;
Status s = db_->Get(ReadOptions(), key1, &value);
ASSERT_OK(s);
ASSERT_EQ(kValueSize, value.size());
ASSERT_EQ('v', value[0]);
s = db_->Get(ReadOptions(), key2, &value);
ASSERT_OK(s);
ASSERT_EQ(kValueSize, value.size());
ASSERT_EQ('w', value[0]);
// Compact all files.
Flush();
db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
// Check DB is not in read-only state.
ASSERT_OK(Put("boo", "v1"));
s = db_->Get(ReadOptions(), key1, &value);
ASSERT_OK(s);
ASSERT_EQ(kValueSize, value.size());
ASSERT_EQ('v', value[0]);
s = db_->Get(ReadOptions(), key2, &value);
ASSERT_OK(s);
ASSERT_EQ(kValueSize, value.size());
ASSERT_EQ('w', value[0]);
}
TEST_F(DBTest, GetFromImmutableLayer) {
do {
Options options = CurrentOptions();
options.env = env_;
CreateAndReopenWithCF({"pikachu"}, options);
ASSERT_OK(Put(1, "foo", "v1"));
ASSERT_EQ("v1", Get(1, "foo"));
// Block sync calls
env_->delay_sstable_sync_.store(true, std::memory_order_release);
Put(1, "k1", std::string(100000, 'x')); // Fill memtable
Put(1, "k2", std::string(100000, 'y')); // Trigger flush
ASSERT_EQ("v1", Get(1, "foo"));
ASSERT_EQ("NOT_FOUND", Get(0, "foo"));
// Release sync calls
env_->delay_sstable_sync_.store(false, std::memory_order_release);
} while (ChangeOptions());
}
TEST_F(DBTest, GetLevel0Ordering) {
do {
CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
// Check that we process level-0 files in correct order. The code
// below generates two level-0 files where the earlier one comes
// before the later one in the level-0 file list since the earlier
// one has a smaller "smallest" key.
ASSERT_OK(Put(1, "bar", "b"));
ASSERT_OK(Put(1, "foo", "v1"));
ASSERT_OK(Flush(1));
ASSERT_OK(Put(1, "foo", "v2"));
ASSERT_OK(Flush(1));
ASSERT_EQ("v2", Get(1, "foo"));
} while (ChangeOptions());
}
TEST_F(DBTest, WrongLevel0Config) {
Options options = CurrentOptions();
Close();
ASSERT_OK(DestroyDB(dbname_, options));
options.level0_stop_writes_trigger = 1;
options.level0_slowdown_writes_trigger = 2;
options.level0_file_num_compaction_trigger = 3;
ASSERT_OK(DB::Open(options, dbname_, &db_));
}
#ifndef ROCKSDB_LITE
TEST_F(DBTest, GetOrderedByLevels) {
do {
CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
ASSERT_OK(Put(1, "foo", "v1"));
Compact(1, "a", "z");
ASSERT_EQ("v1", Get(1, "foo"));
ASSERT_OK(Put(1, "foo", "v2"));
ASSERT_EQ("v2", Get(1, "foo"));
ASSERT_OK(Flush(1));
ASSERT_EQ("v2", Get(1, "foo"));
} while (ChangeOptions());
}
TEST_F(DBTest, GetPicksCorrectFile) {
do {
CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
// Arrange to have multiple files in a non-level-0 level.
ASSERT_OK(Put(1, "a", "va"));
Compact(1, "a", "b");
ASSERT_OK(Put(1, "x", "vx"));
Compact(1, "x", "y");
ASSERT_OK(Put(1, "f", "vf"));
Compact(1, "f", "g");
ASSERT_EQ("va", Get(1, "a"));
ASSERT_EQ("vf", Get(1, "f"));
ASSERT_EQ("vx", Get(1, "x"));
} while (ChangeOptions());
}
TEST_F(DBTest, GetEncountersEmptyLevel) {
do {
Options options = CurrentOptions();
CreateAndReopenWithCF({"pikachu"}, options);
// Arrange for the following to happen:
// * sstable A in level 0
// * nothing in level 1
// * sstable B in level 2
// Then do enough Get() calls to arrange for an automatic compaction
// of sstable A. A bug would cause the compaction to be marked as
// occurring at level 1 (instead of the correct level 0).
// Step 1: First place sstables in levels 0 and 2
Put(1, "a", "begin");
Put(1, "z", "end");
ASSERT_OK(Flush(1));
dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]);
Put(1, "a", "begin");
Put(1, "z", "end");
ASSERT_OK(Flush(1));
ASSERT_GT(NumTableFilesAtLevel(0, 1), 0);
ASSERT_GT(NumTableFilesAtLevel(2, 1), 0);
// Step 2: clear level 1 if necessary.
dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]);
ASSERT_EQ(NumTableFilesAtLevel(0, 1), 1);
ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0);
ASSERT_EQ(NumTableFilesAtLevel(2, 1), 1);
// Step 3: read a bunch of times
for (int i = 0; i < 1000; i++) {
ASSERT_EQ("NOT_FOUND", Get(1, "missing"));
}
// Step 4: Wait for compaction to finish
dbfull()->TEST_WaitForCompact();
ASSERT_EQ(NumTableFilesAtLevel(0, 1), 1); // XXX
} while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction));
}
#endif // ROCKSDB_LITE
TEST_F(DBTest, FlushMultipleMemtable) {
do {
Options options = CurrentOptions();
WriteOptions writeOpt = WriteOptions();
writeOpt.disableWAL = true;
options.max_write_buffer_number = 4;
options.min_write_buffer_number_to_merge = 3;
options.max_write_buffer_number_to_maintain = -1;
CreateAndReopenWithCF({"pikachu"}, options);
ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1"));
ASSERT_OK(Flush(1));
ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1"));
ASSERT_EQ("v1", Get(1, "foo"));
ASSERT_EQ("v1", Get(1, "bar"));
ASSERT_OK(Flush(1));
} while (ChangeCompactOptions());
}
#ifndef ROCKSDB_LITE
TEST_F(DBTest, FlushSchedule) {
Options options = CurrentOptions();
options.disable_auto_compactions = true;
options.level0_stop_writes_trigger = 1 << 10;
options.level0_slowdown_writes_trigger = 1 << 10;
options.min_write_buffer_number_to_merge = 1;
options.max_write_buffer_number_to_maintain = 1;
options.max_write_buffer_number = 2;
options.write_buffer_size = 120 * 1024;
CreateAndReopenWithCF({"pikachu"}, options);
std::vector<port::Thread> threads;
std::atomic<int> thread_num(0);
// each column family will have 5 thread, each thread generating 2 memtables.
// each column family should end up with 10 table files
std::function<void()> fill_memtable_func = [&]() {
int a = thread_num.fetch_add(1);
Random rnd(a);
WriteOptions wo;
// this should fill up 2 memtables
for (int k = 0; k < 5000; ++k) {
ASSERT_OK(db_->Put(wo, handles_[a & 1], RandomString(&rnd, 13), ""));
}
};
for (int i = 0; i < 10; ++i) {
threads.emplace_back(fill_memtable_func);
}
for (auto& t : threads) {
t.join();
}
auto default_tables = GetNumberOfSstFilesForColumnFamily(db_, "default");
auto pikachu_tables = GetNumberOfSstFilesForColumnFamily(db_, "pikachu");
ASSERT_LE(default_tables, static_cast<uint64_t>(10));
ASSERT_GT(default_tables, static_cast<uint64_t>(0));
ASSERT_LE(pikachu_tables, static_cast<uint64_t>(10));
ASSERT_GT(pikachu_tables, static_cast<uint64_t>(0));
}
#endif // ROCKSDB_LITE
namespace {
class KeepFilter : public CompactionFilter {
public:
virtual bool Filter(int level, const Slice& key, const Slice& value,
std::string* new_value,
bool* value_changed) const override {
return false;
}
virtual const char* Name() const override { return "KeepFilter"; }
};
class KeepFilterFactory : public CompactionFilterFactory {
public:
explicit KeepFilterFactory(bool check_context = false)
: check_context_(check_context) {}
virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
const CompactionFilter::Context& context) override {
if (check_context_) {
EXPECT_EQ(expect_full_compaction_.load(), context.is_full_compaction);
EXPECT_EQ(expect_manual_compaction_.load(), context.is_manual_compaction);
}
return std::unique_ptr<CompactionFilter>(new KeepFilter());
}
virtual const char* Name() const override { return "KeepFilterFactory"; }
bool check_context_;
std::atomic_bool expect_full_compaction_;
std::atomic_bool expect_manual_compaction_;
};
class DelayFilter : public CompactionFilter {
public:
explicit DelayFilter(DBTestBase* d) : db_test(d) {}
virtual bool Filter(int level, const Slice& key, const Slice& value,
std::string* new_value,
bool* value_changed) const override {
db_test->env_->addon_time_.fetch_add(1000);
return true;
}
virtual const char* Name() const override { return "DelayFilter"; }
private:
DBTestBase* db_test;
};
class DelayFilterFactory : public CompactionFilterFactory {
public:
explicit DelayFilterFactory(DBTestBase* d) : db_test(d) {}
virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
const CompactionFilter::Context& context) override {
return std::unique_ptr<CompactionFilter>(new DelayFilter(db_test));
}
virtual const char* Name() const override { return "DelayFilterFactory"; }
private:
DBTestBase* db_test;
};
} // namespace
#ifndef ROCKSDB_LITE
static std::string CompressibleString(Random* rnd, int len) {
std::string r;
test::CompressibleString(rnd, 0.8, len, &r);
return r;
}
#endif // ROCKSDB_LITE
TEST_F(DBTest, FailMoreDbPaths) {
Options options = CurrentOptions();
options.db_paths.emplace_back(dbname_, 10000000);
options.db_paths.emplace_back(dbname_ + "_2", 1000000);
options.db_paths.emplace_back(dbname_ + "_3", 1000000);
options.db_paths.emplace_back(dbname_ + "_4", 1000000);
options.db_paths.emplace_back(dbname_ + "_5", 1000000);
ASSERT_TRUE(TryReopen(options).IsNotSupported());
}
void CheckColumnFamilyMeta(const ColumnFamilyMetaData& cf_meta) {
uint64_t cf_size = 0;
uint64_t cf_csize = 0;
size_t file_count = 0;
for (auto level_meta : cf_meta.levels) {
uint64_t level_size = 0;
uint64_t level_csize = 0;
file_count += level_meta.files.size();
for (auto file_meta : level_meta.files) {
level_size += file_meta.size;
}
ASSERT_EQ(level_meta.size, level_size);
cf_size += level_size;
cf_csize += level_csize;
}
ASSERT_EQ(cf_meta.file_count, file_count);
ASSERT_EQ(cf_meta.size, cf_size);
}
#ifndef ROCKSDB_LITE
TEST_F(DBTest, ColumnFamilyMetaDataTest) {
Options options = CurrentOptions();
options.create_if_missing = true;
DestroyAndReopen(options);
Random rnd(301);
int key_index = 0;
ColumnFamilyMetaData cf_meta;
for (int i = 0; i < 100; ++i) {
GenerateNewFile(&rnd, &key_index);
db_->GetColumnFamilyMetaData(&cf_meta);
CheckColumnFamilyMeta(cf_meta);
}
}
namespace {
void MinLevelHelper(DBTest* self, Options& options) {
Random rnd(301);
for (int num = 0; num < options.level0_file_num_compaction_trigger - 1;
num++) {
std::vector<std::string> values;
// Write 120KB (12 values, each 10K)
for (int i = 0; i < 12; i++) {
values.push_back(DBTestBase::RandomString(&rnd, 10000));
ASSERT_OK(self->Put(DBTestBase::Key(i), values[i]));
}
self->dbfull()->TEST_WaitForFlushMemTable();
ASSERT_EQ(self->NumTableFilesAtLevel(0), num + 1);
}
// generate one more file in level-0, and should trigger level-0 compaction
std::vector<std::string> values;
for (int i = 0; i < 12; i++) {
values.push_back(DBTestBase::RandomString(&rnd, 10000));
ASSERT_OK(self->Put(DBTestBase::Key(i), values[i]));
}
self->dbfull()->TEST_WaitForCompact();
ASSERT_EQ(self->NumTableFilesAtLevel(0), 0);
ASSERT_EQ(self->NumTableFilesAtLevel(1), 1);
}
// returns false if the calling-Test should be skipped
bool MinLevelToCompress(CompressionType& type, Options& options, int wbits,
int lev, int strategy) {
fprintf(stderr,
"Test with compression options : window_bits = %d, level = %d, "
"strategy = %d}\n",
wbits, lev, strategy);
options.write_buffer_size = 100 << 10; // 100KB
options.arena_block_size = 4096;
options.num_levels = 3;
options.level0_file_num_compaction_trigger = 3;
options.create_if_missing = true;
if (Snappy_Supported()) {
type = kSnappyCompression;
fprintf(stderr, "using snappy\n");
} else if (Zlib_Supported()) {
type = kZlibCompression;
fprintf(stderr, "using zlib\n");
} else if (BZip2_Supported()) {
type = kBZip2Compression;
fprintf(stderr, "using bzip2\n");
} else if (LZ4_Supported()) {
type = kLZ4Compression;
fprintf(stderr, "using lz4\n");
} else if (XPRESS_Supported()) {
type = kXpressCompression;
fprintf(stderr, "using xpress\n");
} else if (ZSTD_Supported()) {
type = kZSTD;
fprintf(stderr, "using ZSTD\n");
} else {
fprintf(stderr, "skipping test, compression disabled\n");
return false;
}
options.compression_per_level.resize(options.num_levels);
// do not compress L0
for (int i = 0; i < 1; i++) {
options.compression_per_level[i] = kNoCompression;
}
for (int i = 1; i < options.num_levels; i++) {
options.compression_per_level[i] = type;
}
return true;
}
} // namespace
TEST_F(DBTest, MinLevelToCompress1) {
Options options = CurrentOptions();
CompressionType type = kSnappyCompression;
if (!MinLevelToCompress(type, options, -14, -1, 0)) {
return;
}
Reopen(options);
MinLevelHelper(this, options);
// do not compress L0 and L1
for (int i = 0; i < 2; i++) {
options.compression_per_level[i] = kNoCompression;
}
for (int i = 2; i < options.num_levels; i++) {
options.compression_per_level[i] = type;
}
DestroyAndReopen(options);
MinLevelHelper(this, options);
}
TEST_F(DBTest, MinLevelToCompress2) {
Options options = CurrentOptions();
CompressionType type = kSnappyCompression;
if (!MinLevelToCompress(type, options, 15, -1, 0)) {
return;
}
Reopen(options);
MinLevelHelper(this, options);
// do not compress L0 and L1
for (int i = 0; i < 2; i++) {
options.compression_per_level[i] = kNoCompression;
}
for (int i = 2; i < options.num_levels; i++) {
options.compression_per_level[i] = type;
}
DestroyAndReopen(options);
MinLevelHelper(this, options);
}
// This test may fail because of a legit case that multiple L0 files
// are trivial moved to L1.
TEST_F(DBTest, DISABLED_RepeatedWritesToSameKey) {
do {
Options options = CurrentOptions();
options.env = env_;
options.write_buffer_size = 100000; // Small write buffer
CreateAndReopenWithCF({"pikachu"}, options);
// We must have at most one file per level except for level-0,
// which may have up to kL0_StopWritesTrigger files.
const int kMaxFiles =
options.num_levels + options.level0_stop_writes_trigger;
Random rnd(301);
std::string value =
RandomString(&rnd, static_cast<int>(2 * options.write_buffer_size));
for (int i = 0; i < 5 * kMaxFiles; i++) {
ASSERT_OK(Put(1, "key", value));
ASSERT_LE(TotalTableFiles(1), kMaxFiles);
}
} while (ChangeCompactOptions());
}
#endif // ROCKSDB_LITE
TEST_F(DBTest, SparseMerge) {
do {
Options options = CurrentOptions();
options.compression = kNoCompression;
CreateAndReopenWithCF({"pikachu"}, options);
FillLevels("A", "Z", 1);
// Suppose there is:
// small amount of data with prefix A
// large amount of data with prefix B
// small amount of data with prefix C
// and that recent updates have made small changes to all three prefixes.
// Check that we do not do a compaction that merges all of B in one shot.
const std::string value(1000, 'x');
Put(1, "A", "va");
// Write approximately 100MB of "B" values
for (int i = 0; i < 100000; i++) {
char key[100];
snprintf(key, sizeof(key), "B%010d", i);
Put(1, key, value);
}
Put(1, "C", "vc");
ASSERT_OK(Flush(1));
dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
// Make sparse update
Put(1, "A", "va2");
Put(1, "B100", "bvalue2");
Put(1, "C", "vc2");
ASSERT_OK(Flush(1));
// Compactions should not cause us to create a situation where
// a file overlaps too much data at the next level.
ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
20 * 1048576);
dbfull()->TEST_CompactRange(0, nullptr, nullptr);
ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
20 * 1048576);
dbfull()->TEST_CompactRange(1, nullptr, nullptr);
ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
20 * 1048576);
} while (ChangeCompactOptions());
}
#ifndef ROCKSDB_LITE
static bool Between(uint64_t val, uint64_t low, uint64_t high) {
bool result = (val >= low) && (val <= high);
if (!result) {
fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
(unsigned long long)(val), (unsigned long long)(low),
(unsigned long long)(high));
}
return result;
}
TEST_F(DBTest, ApproximateSizesMemTable) {
Options options = CurrentOptions();
options.write_buffer_size = 100000000; // Large write buffer
options.compression = kNoCompression;
options.create_if_missing = true;
DestroyAndReopen(options);
const int N = 128;
Random rnd(301);
for (int i = 0; i < N; i++) {
ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
}
uint64_t size;
std::string start = Key(50);
std::string end = Key(60);
Range r(start, end);
uint8_t include_both = DB::SizeApproximationFlags::INCLUDE_FILES |
DB::SizeApproximationFlags::INCLUDE_MEMTABLES;
db_->GetApproximateSizes(&r, 1, &size, include_both);
ASSERT_GT(size, 6000);
ASSERT_LT(size, 204800);
// Zero if not including mem table
db_->GetApproximateSizes(&r, 1, &size);
ASSERT_EQ(size, 0);
start = Key(500);
end = Key(600);
r = Range(start, end);
db_->GetApproximateSizes(&r, 1, &size, include_both);
ASSERT_EQ(size, 0);
for (int i = 0; i < N; i++) {
ASSERT_OK(Put(Key(1000 + i), RandomString(&rnd, 1024)));
}
start = Key(500);
end = Key(600);
r = Range(start, end);
db_->GetApproximateSizes(&r, 1, &size, include_both);
ASSERT_EQ(size, 0);
start = Key(100);
end = Key(1020);
r = Range(start, end);
db_->GetApproximateSizes(&r, 1, &size, include_both);
ASSERT_GT(size, 6000);
options.max_write_buffer_number = 8;
options.min_write_buffer_number_to_merge = 5;
options.write_buffer_size = 1024 * N; // Not very large
DestroyAndReopen(options);
int keys[N * 3];
for (int i = 0; i < N; i++) {
keys[i * 3] = i * 5;
keys[i * 3 + 1] = i * 5 + 1;
keys[i * 3 + 2] = i * 5 + 2;
}
std::random_shuffle(std::begin(keys), std::end(keys));
for (int i = 0; i < N * 3; i++) {
ASSERT_OK(Put(Key(keys[i] + 1000), RandomString(&rnd, 1024)));
}
start = Key(100);
end = Key(300);
r = Range(start, end);
db_->GetApproximateSizes(&r, 1, &size, include_both);
ASSERT_EQ(size, 0);
start = Key(1050);
end = Key(1080);
r = Range(start, end);
db_->GetApproximateSizes(&r, 1, &size, include_both);
ASSERT_GT(size, 6000);
start = Key(2100);
end = Key(2300);
r = Range(start, end);
db_->GetApproximateSizes(&r, 1, &size, include_both);
ASSERT_EQ(size, 0);
start = Key(1050);
end = Key(1080);
r = Range(start, end);
uint64_t size_with_mt, size_without_mt;
db_->GetApproximateSizes(&r, 1, &size_with_mt, include_both);
ASSERT_GT(size_with_mt, 6000);
db_->GetApproximateSizes(&r, 1, &size_without_mt);
ASSERT_EQ(size_without_mt, 0);
Flush();
for (int i = 0; i < N; i++) {
ASSERT_OK(Put(Key(i + 1000), RandomString(&rnd, 1024)));
}
start = Key(1050);
end = Key(1080);
r = Range(start, end);
db_->GetApproximateSizes(&r, 1, &size_with_mt, include_both);
db_->GetApproximateSizes(&r, 1, &size_without_mt);
ASSERT_GT(size_with_mt, size_without_mt);
ASSERT_GT(size_without_mt, 6000);
}
TEST_F(DBTest, GetApproximateMemTableStats) {
Options options = CurrentOptions();
options.write_buffer_size = 100000000;
options.compression = kNoCompression;
options.create_if_missing = true;
DestroyAndReopen(options);
const int N = 128;
Random rnd(301);
for (int i = 0; i < N; i++) {
ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
}
uint64_t count;
uint64_t size;
std::string start = Key(50);
std::string end = Key(60);
Range r(start, end);
db_->GetApproximateMemTableStats(r, &count, &size);
ASSERT_GT(count, 0);
ASSERT_LE(count, N);
ASSERT_GT(size, 6000);
ASSERT_LT(size, 204800);
start = Key(500);
end = Key(600);
r = Range(start, end);
db_->GetApproximateMemTableStats(r, &count, &size);
ASSERT_EQ(count, 0);
ASSERT_EQ(size, 0);
Flush();
start = Key(50);
end = Key(60);
r = Range(start, end);
db_->GetApproximateMemTableStats(r, &count, &size);
ASSERT_EQ(count, 0);
ASSERT_EQ(size, 0);
for (int i = 0; i < N; i++) {
ASSERT_OK(Put(Key(1000 + i), RandomString(&rnd, 1024)));
}
start = Key(100);
end = Key(1020);
r = Range(start, end);
db_->GetApproximateMemTableStats(r, &count, &size);
ASSERT_GT(count, 20);
ASSERT_GT(size, 6000);
}
TEST_F(DBTest, ApproximateSizes) {
do {
Options options = CurrentOptions();
options.write_buffer_size = 100000000; // Large write buffer
options.compression = kNoCompression;
options.create_if_missing = true;
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
ASSERT_TRUE(Between(Size("", "xyz", 1), 0, 0));
ReopenWithColumnFamilies({"default", "pikachu"}, options);
ASSERT_TRUE(Between(Size("", "xyz", 1), 0, 0));
// Write 8MB (80 values, each 100K)
ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
const int N = 80;
static const int S1 = 100000;
static const int S2 = 105000; // Allow some expansion from metadata
Random rnd(301);
for (int i = 0; i < N; i++) {
ASSERT_OK(Put(1, Key(i), RandomString(&rnd, S1)));
}
// 0 because GetApproximateSizes() does not account for memtable space
ASSERT_TRUE(Between(Size("", Key(50), 1), 0, 0));
// Check sizes across recovery by reopening a few times
for (int run = 0; run < 3; run++) {
ReopenWithColumnFamilies({"default", "pikachu"}, options);
for (int compact_start = 0; compact_start < N; compact_start += 10) {
for (int i = 0; i < N; i += 10) {
ASSERT_TRUE(Between(Size("", Key(i), 1), S1 * i, S2 * i));
ASSERT_TRUE(Between(Size("", Key(i) + ".suffix", 1), S1 * (i + 1),
S2 * (i + 1)));
ASSERT_TRUE(Between(Size(Key(i), Key(i + 10), 1), S1 * 10, S2 * 10));
}
ASSERT_TRUE(Between(Size("", Key(50), 1), S1 * 50, S2 * 50));
ASSERT_TRUE(
Between(Size("", Key(50) + ".suffix", 1), S1 * 50, S2 * 50));
std::string cstart_str = Key(compact_start);
std::string cend_str = Key(compact_start + 9);
Slice cstart = cstart_str;
Slice cend = cend_str;
dbfull()->TEST_CompactRange(0, &cstart, &cend, handles_[1]);
}
ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
ASSERT_GT(NumTableFilesAtLevel(1, 1), 0);
}
// ApproximateOffsetOf() is not yet implemented in plain table format.
} while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction |
kSkipPlainTable | kSkipHashIndex));
}
TEST_F(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
do {
Options options = CurrentOptions();
options.compression = kNoCompression;
CreateAndReopenWithCF({"pikachu"}, options);
Random rnd(301);
std::string big1 = RandomString(&rnd, 100000);
ASSERT_OK(Put(1, Key(0), RandomString(&rnd, 10000)));
ASSERT_OK(Put(1, Key(1), RandomString(&rnd, 10000)));
ASSERT_OK(Put(1, Key(2), big1));
ASSERT_OK(Put(1, Key(3), RandomString(&rnd, 10000)));
ASSERT_OK(Put(1, Key(4), big1));
ASSERT_OK(Put(1, Key(5), RandomString(&rnd, 10000)));
ASSERT_OK(Put(1, Key(6), RandomString(&rnd, 300000)));
ASSERT_OK(Put(1, Key(7), RandomString(&rnd, 10000)));
// Check sizes across recovery by reopening a few times
for (int run = 0; run < 3; run++) {
ReopenWithColumnFamilies({"default", "pikachu"}, options);
ASSERT_TRUE(Between(Size("", Key(0), 1), 0, 0));
ASSERT_TRUE(Between(Size("", Key(1), 1), 10000, 11000));
ASSERT_TRUE(Between(Size("", Key(2), 1), 20000, 21000));
ASSERT_TRUE(Between(Size("", Key(3), 1), 120000, 121000));
ASSERT_TRUE(Between(Size("", Key(4), 1), 130000, 131000));
ASSERT_TRUE(Between(Size("", Key(5), 1), 230000, 231000));
ASSERT_TRUE(Between(Size("", Key(6), 1), 240000, 241000));
ASSERT_TRUE(Between(Size("", Key(7), 1), 540000, 541000));
ASSERT_TRUE(Between(Size("", Key(8), 1), 550000, 560000));
ASSERT_TRUE(Between(Size(Key(3), Key(5), 1), 110000, 111000));
dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
}
// ApproximateOffsetOf() is not yet implemented in plain table format.
} while (ChangeOptions(kSkipPlainTable));
}
#endif // ROCKSDB_LITE
#ifndef ROCKSDB_LITE
TEST_F(DBTest, Snapshot) {
anon::OptionsOverride options_override;
options_override.skip_policy = kSkipNoSnapshot;
do {
CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
Put(0, "foo", "0v1");
Put(1, "foo", "1v1");
const Snapshot* s1 = db_->GetSnapshot();
ASSERT_EQ(1U, GetNumSnapshots());
uint64_t time_snap1 = GetTimeOldestSnapshots();
ASSERT_GT(time_snap1, 0U);
Put(0, "foo", "0v2");
Put(1, "foo", "1v2");
env_->addon_time_.fetch_add(1);
const Snapshot* s2 = db_->GetSnapshot();
ASSERT_EQ(2U, GetNumSnapshots());
ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
Put(0, "foo", "0v3");
Put(1, "foo", "1v3");
{
ManagedSnapshot s3(db_);
ASSERT_EQ(3U, GetNumSnapshots());
ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
Put(0, "foo", "0v4");
Put(1, "foo", "1v4");
ASSERT_EQ("0v1", Get(0, "foo", s1));
ASSERT_EQ("1v1", Get(1, "foo", s1));
ASSERT_EQ("0v2", Get(0, "foo", s2));
ASSERT_EQ("1v2", Get(1, "foo", s2));
ASSERT_EQ("0v3", Get(0, "foo", s3.snapshot()));
ASSERT_EQ("1v3", Get(1, "foo", s3.snapshot()));
ASSERT_EQ("0v4", Get(0, "foo"));
ASSERT_EQ("1v4", Get(1, "foo"));
}
ASSERT_EQ(2U, GetNumSnapshots());
ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
ASSERT_EQ("0v1", Get(0, "foo", s1));
ASSERT_EQ("1v1", Get(1, "foo", s1));
ASSERT_EQ("0v2", Get(0, "foo", s2));
ASSERT_EQ("1v2", Get(1, "foo", s2));
ASSERT_EQ("0v4", Get(0, "foo"));
ASSERT_EQ("1v4", Get(1, "foo"));
db_->ReleaseSnapshot(s1);
ASSERT_EQ("0v2", Get(0, "foo", s2));
ASSERT_EQ("1v2", Get(1, "foo", s2));
ASSERT_EQ("0v4", Get(0, "foo"));
ASSERT_EQ("1v4", Get(1, "foo"));
ASSERT_EQ(1U, GetNumSnapshots());
ASSERT_LT(time_snap1, GetTimeOldestSnapshots());
db_->ReleaseSnapshot(s2);
ASSERT_EQ(0U, GetNumSnapshots());
ASSERT_EQ("0v4", Get(0, "foo"));
ASSERT_EQ("1v4", Get(1, "foo"));
} while (ChangeOptions(kSkipHashCuckoo));
}
TEST_F(DBTest, HiddenValuesAreRemoved) {
anon::OptionsOverride options_override;
options_override.skip_policy = kSkipNoSnapshot;
do {
Options options = CurrentOptions(options_override);
CreateAndReopenWithCF({"pikachu"}, options);
Random rnd(301);
FillLevels("a", "z", 1);
std::string big = RandomString(&rnd, 50000);
Put(1, "foo", big);
Put(1, "pastfoo", "v");
const Snapshot* snapshot = db_->GetSnapshot();
Put(1, "foo", "tiny");
Put(1, "pastfoo2", "v2"); // Advance sequence number one more
ASSERT_OK(Flush(1));
ASSERT_GT(NumTableFilesAtLevel(0, 1), 0);
ASSERT_EQ(big, Get(1, "foo", snapshot));
ASSERT_TRUE(Between(Size("", "pastfoo", 1), 50000, 60000));
db_->ReleaseSnapshot(snapshot);
ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny, " + big + " ]");
Slice x("x");
dbfull()->TEST_CompactRange(0, nullptr, &x, handles_[1]);
ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]");
ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
ASSERT_GE(NumTableFilesAtLevel(1, 1), 1);
dbfull()->TEST_CompactRange(1, nullptr, &x, handles_[1]);
ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]");
ASSERT_TRUE(Between(Size("", "pastfoo", 1), 0, 1000));
// ApproximateOffsetOf() is not yet implemented in plain table format,
// which is used by Size().
// skip HashCuckooRep as it does not support snapshot
} while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction |
kSkipPlainTable | kSkipHashCuckoo));
}
#endif // ROCKSDB_LITE
TEST_F(DBTest, UnremovableSingleDelete) {
// If we compact:
//
// Put(A, v1) Snapshot SingleDelete(A) Put(A, v2)
//
// We do not want to end up with:
//
// Put(A, v1) Snapshot Put(A, v2)
//
// Because a subsequent SingleDelete(A) would delete the Put(A, v2)
// but not Put(A, v1), so Get(A) would return v1.
anon::OptionsOverride options_override;
options_override.skip_policy = kSkipNoSnapshot;
do {
Options options = CurrentOptions(options_override);
options.disable_auto_compactions = true;
CreateAndReopenWithCF({"pikachu"}, options);
Put(1, "foo", "first");
const Snapshot* snapshot = db_->GetSnapshot();
SingleDelete(1, "foo");
Put(1, "foo", "second");
ASSERT_OK(Flush(1));
ASSERT_EQ("first", Get(1, "foo", snapshot));
ASSERT_EQ("second", Get(1, "foo"));
dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
nullptr);
ASSERT_EQ("[ second, SDEL, first ]", AllEntriesFor("foo", 1));
SingleDelete(1, "foo");
ASSERT_EQ("first", Get(1, "foo", snapshot));
ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
nullptr);
ASSERT_EQ("first", Get(1, "foo", snapshot));
ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
db_->ReleaseSnapshot(snapshot);
// Skip HashCuckooRep as it does not support single delete. FIFO and
// universal compaction do not apply to the test case. Skip MergePut
// because single delete does not get removed when it encounters a merge.
} while (ChangeOptions(kSkipHashCuckoo | kSkipFIFOCompaction |
kSkipUniversalCompaction | kSkipMergePut));
}
#ifndef ROCKSDB_LITE
TEST_F(DBTest, DeletionMarkers1) {
Options options = CurrentOptions();
CreateAndReopenWithCF({"pikachu"}, options);
Put(1, "foo", "v1");
ASSERT_OK(Flush(1));
const int last = 2;
MoveFilesToLevel(last, 1);
// foo => v1 is now in last level
ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
// Place a table at level last-1 to prevent merging with preceding mutation
Put(1, "a", "begin");
Put(1, "z", "end");
Flush(1);
MoveFilesToLevel(last - 1, 1);
ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
ASSERT_EQ(NumTableFilesAtLevel(last - 1, 1), 1);
Delete(1, "foo");
Put(1, "foo", "v2");
ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, DEL, v1 ]");
ASSERT_OK(Flush(1)); // Moves to level last-2
ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
Slice z("z");
dbfull()->TEST_CompactRange(last - 2, nullptr, &z, handles_[1]);
// DEL eliminated, but v1 remains because we aren't compacting that level
// (DEL can be eliminated because v2 hides v1).
ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr, handles_[1]);
// Merging last-1 w/ last, so we are the base level for "foo", so
// DEL is removed. (as is v1).
ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2 ]");
}
TEST_F(DBTest, DeletionMarkers2) {
Options options = CurrentOptions();
CreateAndReopenWithCF({"pikachu"}, options);
Put(1, "foo", "v1");
ASSERT_OK(Flush(1));
const int last = 2;
MoveFilesToLevel(last, 1);
// foo => v1 is now in last level
ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
// Place a table at level last-1 to prevent merging with preceding mutation
Put(1, "a", "begin");
Put(1, "z", "end");
Flush(1);
MoveFilesToLevel(last - 1, 1);
ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
ASSERT_EQ(NumTableFilesAtLevel(last - 1, 1), 1);
Delete(1, "foo");
ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
ASSERT_OK(Flush(1)); // Moves to level last-2
ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
dbfull()->TEST_CompactRange(last - 2, nullptr, nullptr, handles_[1]);
// DEL kept: "last" file overlaps
ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr, handles_[1]);
// Merging last-1 w/ last, so we are the base level for "foo", so
// DEL is removed. (as is v1).
ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
}
TEST_F(DBTest, OverlapInLevel0) {
do {
Options options = CurrentOptions();
CreateAndReopenWithCF({"pikachu"}, options);
// Fill levels 1 and 2 to disable the pushing of new memtables to levels >
// 0.
ASSERT_OK(Put(1, "100", "v100"));
ASSERT_OK(Put(1, "999", "v999"));
Flush(1);
MoveFilesToLevel(2, 1);
ASSERT_OK(Delete(1, "100"));
ASSERT_OK(Delete(1, "999"));
Flush(1);
MoveFilesToLevel(1, 1);
ASSERT_EQ("0,1,1", FilesPerLevel(1));
// Make files spanning the following ranges in level-0:
// files[0] 200 .. 900
// files[1] 300 .. 500
// Note that files are sorted by smallest key.
ASSERT_OK(Put(1, "300", "v300"));
ASSERT_OK(Put(1, "500", "v500"));
Flush(1);
ASSERT_OK(Put(1, "200", "v200"));
ASSERT_OK(Put(1, "600", "v600"));
ASSERT_OK(Put(1, "900", "v900"));
Flush(1);
ASSERT_EQ("2,1,1", FilesPerLevel(1));
// Compact away the placeholder files we created initially
dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]);
dbfull()->TEST_CompactRange(2, nullptr, nullptr, handles_[1]);
ASSERT_EQ("2", FilesPerLevel(1));
// Do a memtable compaction. Before bug-fix, the compaction would
// not detect the overlap with level-0 files and would incorrectly place
// the deletion in a deeper level.
ASSERT_OK(Delete(1, "600"));
Flush(1);
ASSERT_EQ("3", FilesPerLevel(1));
ASSERT_EQ("NOT_FOUND", Get(1, "600"));
} while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction));
}
#endif // ROCKSDB_LITE
TEST_F(DBTest, ComparatorCheck) {
class NewComparator : public Comparator {
public:
virtual const char* Name() const override {
return "rocksdb.NewComparator";
}
virtual int Compare(const Slice& a, const Slice& b) const override {
return BytewiseComparator()->Compare(a, b);
}
virtual void FindShortestSeparator(std::string* s,
const Slice& l) const override {
BytewiseComparator()->FindShortestSeparator(s, l);
}
virtual void FindShortSuccessor(std::string* key) const override {
BytewiseComparator()->FindShortSuccessor(key);
}
};
Options new_options, options;
NewComparator cmp;
do {
options = CurrentOptions();
CreateAndReopenWithCF({"pikachu"}, options);
new_options = CurrentOptions();
new_options.comparator = &cmp;
// only the non-default column family has non-matching comparator
Status s = TryReopenWithColumnFamilies(
{"default", "pikachu"}, std::vector<Options>({options, new_options}));
ASSERT_TRUE(!s.ok());
ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos)
<< s.ToString();
} while (ChangeCompactOptions());
}
TEST_F(DBTest, CustomComparator) {
class NumberComparator : public Comparator {
public:
virtual const char* Name() const override {
return "test.NumberComparator";
}
virtual int Compare(const Slice& a, const Slice& b) const override {
return ToNumber(a) - ToNumber(b);
}
virtual void FindShortestSeparator(std::string* s,
const Slice& l) const override {
ToNumber(*s); // Check format
ToNumber(l); // Check format
}
virtual void FindShortSuccessor(std::string* key) const override {
ToNumber(*key); // Check format
}
private:
static int ToNumber(const Slice& x) {
// Check that there are no extra characters.
EXPECT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size() - 1] == ']')
<< EscapeString(x);
int val;
char ignored;
EXPECT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1)
<< EscapeString(x);
return val;
}
};
Options new_options;
NumberComparator cmp;
do {
new_options = CurrentOptions();
new_options.create_if_missing = true;
new_options.comparator = &cmp;
new_options.write_buffer_size = 4096; // Compact more often
new_options.arena_block_size = 4096;
new_options = CurrentOptions(new_options);
DestroyAndReopen(new_options);
CreateAndReopenWithCF({"pikachu"}, new_options);
ASSERT_OK(Put(1, "[10]", "ten"));
ASSERT_OK(Put(1, "[0x14]", "twenty"));
for (int i = 0; i < 2; i++) {
ASSERT_EQ("ten", Get(1, "[10]"));
ASSERT_EQ("ten", Get(1, "[0xa]"));
ASSERT_EQ("twenty", Get(1, "[20]"));
ASSERT_EQ("twenty", Get(1, "[0x14]"));
ASSERT_EQ("NOT_FOUND", Get(1, "[15]"));
ASSERT_EQ("NOT_FOUND", Get(1, "[0xf]"));
Compact(1, "[0]", "[9999]");
}
for (int run = 0; run < 2; run++) {
for (int i = 0; i < 1000; i++) {
char buf[100];
snprintf(buf, sizeof(buf), "[%d]", i * 10);
ASSERT_OK(Put(1, buf, buf));
}
Compact(1, "[0]", "[1000000]");
}
} while (ChangeCompactOptions());
}
TEST_F(DBTest, DBOpen_Options) {
Options options = CurrentOptions();
std::string dbname = test::TmpDir(env_) + "/db_options_test";
ASSERT_OK(DestroyDB(dbname, options));
// Does not exist, and create_if_missing == false: error
DB* db = nullptr;
options.create_if_missing = false;
Status s = DB::Open(options, dbname, &db);
ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != nullptr);
ASSERT_TRUE(db == nullptr);
// Does not exist, and create_if_missing == true: OK
options.create_if_missing = true;
s = DB::Open(options, dbname, &db);
ASSERT_OK(s);
ASSERT_TRUE(db != nullptr);
delete db;
db = nullptr;
// Does exist, and error_if_exists == true: error
options.create_if_missing = false;
options.error_if_exists = true;
s = DB::Open(options, dbname, &db);
ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != nullptr);
ASSERT_TRUE(db == nullptr);
// Does exist, and error_if_exists == false: OK
options.create_if_missing = true;
options.error_if_exists = false;
s = DB::Open(options, dbname, &db);
ASSERT_OK(s);
ASSERT_TRUE(db != nullptr);
delete db;
db = nullptr;
}
TEST_F(DBTest, DBOpen_Change_NumLevels) {
Options options = CurrentOptions();
options.create_if_missing = true;
DestroyAndReopen(options);
ASSERT_TRUE(db_ != nullptr);
CreateAndReopenWithCF({"pikachu"}, options);
ASSERT_OK(Put(1, "a", "123"));
ASSERT_OK(Put(1, "b", "234"));
Flush(1);
MoveFilesToLevel(3, 1);
Close();
options.create_if_missing = false;
options.num_levels = 2;
Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
ASSERT_TRUE(strstr(s.ToString().c_str(), "Invalid argument") != nullptr);
ASSERT_TRUE(db_ == nullptr);
}
TEST_F(DBTest, DestroyDBMetaDatabase) {
std::string dbname = test::TmpDir(env_) + "/db_meta";
ASSERT_OK(env_->CreateDirIfMissing(dbname));
std::string metadbname = MetaDatabaseName(dbname, 0);
ASSERT_OK(env_->CreateDirIfMissing(metadbname));
std::string metametadbname = MetaDatabaseName(metadbname, 0);
ASSERT_OK(env_->CreateDirIfMissing(metametadbname));
// Destroy previous versions if they exist. Using the long way.
Options options = CurrentOptions();
ASSERT_OK(DestroyDB(metametadbname, options));
ASSERT_OK(DestroyDB(metadbname, options));
ASSERT_OK(DestroyDB(dbname, options));
// Setup databases
DB* db = nullptr;
ASSERT_OK(DB::Open(options, dbname, &db));
delete db;
db = nullptr;
ASSERT_OK(DB::Open(options, metadbname, &db));
delete db;
db = nullptr;
ASSERT_OK(DB::Open(options, metametadbname, &db));
delete db;
db = nullptr;
// Delete databases
ASSERT_OK(DestroyDB(dbname, options));
// Check if deletion worked.
options.create_if_missing = false;
ASSERT_TRUE(!(DB::Open(options, dbname, &db)).ok());
ASSERT_TRUE(!(DB::Open(options, metadbname, &db)).ok());
ASSERT_TRUE(!(DB::Open(options, metametadbname, &db)).ok());
}
#ifndef ROCKSDB_LITE
TEST_F(DBTest, SnapshotFiles) {
do {
Options options = CurrentOptions();
options.write_buffer_size = 100000000; // Large write buffer
CreateAndReopenWithCF({"pikachu"}, options);
Random rnd(301);
// Write 8MB (80 values, each 100K)
ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
std::vector<std::string> values;
for (int i = 0; i < 80; i++) {
values.push_back(RandomString(&rnd, 100000));
ASSERT_OK(Put((i < 40), Key(i), values[i]));
}
// assert that nothing makes it to disk yet.
ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
// get a file snapshot
uint64_t manifest_number = 0;
uint64_t manifest_size = 0;
std::vector<std::string> files;
dbfull()->DisableFileDeletions();
dbfull()->GetLiveFiles(files, &manifest_size);
// CURRENT, MANIFEST, OPTIONS, *.sst files (one for each CF)
ASSERT_EQ(files.size(), 5U);
uint64_t number = 0;
FileType type;
// copy these files to a new snapshot directory
std::string snapdir = dbname_ + ".snapdir/";
ASSERT_OK(env_->CreateDirIfMissing(snapdir));
for (size_t i = 0; i < files.size(); i++) {
// our clients require that GetLiveFiles returns
// files with "/" as first character!
ASSERT_EQ(files[i][0], '/');
std::string src = dbname_ + files[i];
std::string dest = snapdir + files[i];
uint64_t size;
ASSERT_OK(env_->GetFileSize(src, &size));
// record the number and the size of the
// latest manifest file
if (ParseFileName(files[i].substr(1), &number, &type)) {
if (type == kDescriptorFile) {
if (number > manifest_number) {
manifest_number = number;
ASSERT_GE(size, manifest_size);
size = manifest_size; // copy only valid MANIFEST data
}
}
}
CopyFile(src, dest, size);
}
// release file snapshot
dbfull()->DisableFileDeletions();
// overwrite one key, this key should not appear in the snapshot
std::vector<std::string> extras;
for (unsigned int i = 0; i < 1; i++) {
extras.push_back(RandomString(&rnd, 100000));
ASSERT_OK(Put(0, Key(i), extras[i]));
}
// verify that data in the snapshot are correct
std::vector<ColumnFamilyDescriptor> column_families;
column_families.emplace_back("default", ColumnFamilyOptions());
column_families.emplace_back("pikachu", ColumnFamilyOptions());
std::vector<ColumnFamilyHandle*> cf_handles;
DB* snapdb;
DBOptions opts;
opts.env = env_;
opts.create_if_missing = false;
Status stat =
DB::Open(opts, snapdir, column_families, &cf_handles, &snapdb);
ASSERT_OK(stat);
ReadOptions roptions;
std::string val;
for (unsigned int i = 0; i < 80; i++) {
stat = snapdb->Get(roptions, cf_handles[i < 40], Key(i), &val);
ASSERT_EQ(values[i].compare(val), 0);
}
for (auto cfh : cf_handles) {
delete cfh;
}
delete snapdb;
// look at the new live files after we added an 'extra' key
// and after we took the first snapshot.
uint64_t new_manifest_number = 0;
uint64_t new_manifest_size = 0;
std::vector<std::string> newfiles;
dbfull()->DisableFileDeletions();
dbfull()->GetLiveFiles(newfiles, &new_manifest_size);
// find the new manifest file. assert that this manifest file is
// the same one as in the previous snapshot. But its size should be
// larger because we added an extra key after taking the
// previous shapshot.
for (size_t i = 0; i < newfiles.size(); i++) {
std::string src = dbname_ + "/" + newfiles[i];
// record the lognumber and the size of the
// latest manifest file
if (ParseFileName(newfiles[i].substr(1), &number, &type)) {
if (type == kDescriptorFile) {
if (number > new_manifest_number) {
uint64_t size;
new_manifest_number = number;
ASSERT_OK(env_->GetFileSize(src, &size));
ASSERT_GE(size, new_manifest_size);
}
}
}
}
ASSERT_EQ(manifest_number, new_manifest_number);
ASSERT_GT(new_manifest_size, manifest_size);
// release file snapshot
dbfull()->DisableFileDeletions();
} while (ChangeCompactOptions());
}
#endif
TEST_F(DBTest, PurgeInfoLogs) {
Options options = CurrentOptions();
options.keep_log_file_num = 5;
options.create_if_missing = true;
for (int mode = 0; mode <= 1; mode++) {
if (mode == 1) {
options.db_log_dir = dbname_ + "_logs";
env_->CreateDirIfMissing(options.db_log_dir);
} else {
options.db_log_dir = "";
}
for (int i = 0; i < 8; i++) {
Reopen(options);
}
std::vector<std::string> files;
env_->GetChildren(options.db_log_dir.empty() ? dbname_ : options.db_log_dir,
&files);
int info_log_count = 0;
for (std::string file : files) {
if (file.find("LOG") != std::string::npos) {
info_log_count++;
}
}
ASSERT_EQ(5, info_log_count);
Destroy(options);
// For mode (1), test DestroyDB() to delete all the logs under DB dir.
// For mode (2), no info log file should have been put under DB dir.
std::vector<std::string> db_files;
env_->GetChildren(dbname_, &db_files);
for (std::string file : db_files) {
ASSERT_TRUE(file.find("LOG") == std::string::npos);
}
if (mode == 1) {
// Cleaning up
env_->GetChildren(options.db_log_dir, &files);
for (std::string file : files) {
env_->DeleteFile(options.db_log_dir + "/" + file);
}
env_->DeleteDir(options.db_log_dir);
}
}
}
#ifndef ROCKSDB_LITE
// Multi-threaded test:
namespace {
static const int kColumnFamilies = 10;
static const int kNumThreads = 10;
static const int kTestSeconds = 10;
static const int kNumKeys = 1000;
struct MTState {
DBTest* test;
std::atomic<bool> stop;
std::atomic<int> counter[kNumThreads];
std::atomic<bool> thread_done[kNumThreads];
};
struct MTThread {
MTState* state;
int id;
};
static void MTThreadBody(void* arg) {
MTThread* t = reinterpret_cast<MTThread*>(arg);
int id = t->id;
DB* db = t->state->test->db_;
int counter = 0;
fprintf(stderr, "... starting thread %d\n", id);
Random rnd(1000 + id);
char valbuf[1500];
while (t->state->stop.load(std::memory_order_acquire) == false) {
t->state->counter[id].store(counter, std::memory_order_release);
int key = rnd.Uniform(kNumKeys);
char keybuf[20];
snprintf(keybuf, sizeof(keybuf), "%016d", key);
if (rnd.OneIn(2)) {
// Write values of the form <key, my id, counter, cf, unique_id>.
// into each of the CFs
// We add some padding for force compactions.
int unique_id = rnd.Uniform(1000000);
// Half of the time directly use WriteBatch. Half of the time use
// WriteBatchWithIndex.
if (rnd.OneIn(2)) {
WriteBatch batch;
for (int cf = 0; cf < kColumnFamilies; ++cf) {
snprintf(valbuf, sizeof(valbuf), "%d.%d.%d.%d.%-1000d", key, id,
static_cast<int>(counter), cf, unique_id);
batch.Put(t->state->test->handles_[cf], Slice(keybuf), Slice(valbuf));
}
ASSERT_OK(db->Write(WriteOptions(), &batch));
} else {
WriteBatchWithIndex batch(db->GetOptions().comparator);
for (int cf = 0; cf < kColumnFamilies; ++cf) {
snprintf(valbuf, sizeof(valbuf), "%d.%d.%d.%d.%-1000d", key, id,
static_cast<int>(counter), cf, unique_id);
batch.Put(t->state->test->handles_[cf], Slice(keybuf), Slice(valbuf));
}
ASSERT_OK(db->Write(WriteOptions(), batch.GetWriteBatch()));
}
} else {
// Read a value and verify that it matches the pattern written above
// and that writes to all column families were atomic (unique_id is the
// same)
std::vector<Slice> keys(kColumnFamilies, Slice(keybuf));
std::vector<std::string> values;
std::vector<Status> statuses =
db->MultiGet(ReadOptions(), t->state->test->handles_, keys, &values);
Status s = statuses[0];
// all statuses have to be the same
for (size_t i = 1; i < statuses.size(); ++i) {
// they are either both ok or both not-found
ASSERT_TRUE((s.ok() && statuses[i].ok()) ||
(s.IsNotFound() && statuses[i].IsNotFound()));
}
if (s.IsNotFound()) {
// Key has not yet been written
} else {
// Check that the writer thread counter is >= the counter in the value
ASSERT_OK(s);
int unique_id = -1;
for (int i = 0; i < kColumnFamilies; ++i) {
int k, w, c, cf, u;
ASSERT_EQ(5, sscanf(values[i].c_str(), "%d.%d.%d.%d.%d", &k, &w, &c,
&cf, &u))
<< values[i];
ASSERT_EQ(k, key);
ASSERT_GE(w, 0);
ASSERT_LT(w, kNumThreads);
ASSERT_LE(c, t->state->counter[w].load(std::memory_order_acquire));
ASSERT_EQ(cf, i);
if (i == 0) {
unique_id = u;
} else {
// this checks that updates across column families happened
// atomically -- all unique ids are the same
ASSERT_EQ(u, unique_id);
}
}
}
}
counter++;
}
t->state->thread_done[id].store(true, std::memory_order_release);
fprintf(stderr, "... stopping thread %d after %d ops\n", id, int(counter));
}
} // namespace
class MultiThreadedDBTest : public DBTest,
public ::testing::WithParamInterface<int> {
public:
virtual void SetUp() override { option_config_ = GetParam(); }
static std::vector<int> GenerateOptionConfigs() {
std::vector<int> optionConfigs;
for (int optionConfig = kDefault; optionConfig < kEnd; ++optionConfig) {
// skip as HashCuckooRep does not support snapshot
if (optionConfig != kHashCuckoo) {
optionConfigs.push_back(optionConfig);
}
}
return optionConfigs;
}
};
TEST_P(MultiThreadedDBTest, MultiThreaded) {
anon::OptionsOverride options_override;
options_override.skip_policy = kSkipNoSnapshot;
Options options = CurrentOptions(options_override);
std::vector<std::string> cfs;
for (int i = 1; i < kColumnFamilies; ++i) {
cfs.push_back(ToString(i));
}
Reopen(options);
CreateAndReopenWithCF(cfs, options);
// Initialize state
MTState mt;
mt.test = this;
mt.stop.store(false, std::memory_order_release);
for (int id = 0; id < kNumThreads; id++) {
mt.counter[id].store(0, std::memory_order_release);
mt.thread_done[id].store(false, std::memory_order_release);
}
// Start threads
MTThread thread[kNumThreads];
for (int id = 0; id < kNumThreads; id++) {
thread[id].state = &mt;
thread[id].id = id;
env_->StartThread(MTThreadBody, &thread[id]);
}
// Let them run for a while
env_->SleepForMicroseconds(kTestSeconds * 1000000);
// Stop the threads and wait for them to finish
mt.stop.store(true, std::memory_order_release);
for (int id = 0; id < kNumThreads; id++) {
while (mt.thread_done[id].load(std::memory_order_acquire) == false) {
env_->SleepForMicroseconds(100000);
}
}
}
INSTANTIATE_TEST_CASE_P(
MultiThreaded, MultiThreadedDBTest,
::testing::ValuesIn(MultiThreadedDBTest::GenerateOptionConfigs()));
#endif // ROCKSDB_LITE
// Group commit test:
namespace {
static const int kGCNumThreads = 4;
static const int kGCNumKeys = 1000;
struct GCThread {
DB* db;
int id;
std::atomic<bool> done;
};
static void GCThreadBody(void* arg) {
GCThread* t = reinterpret_cast<GCThread*>(arg);
int id = t->id;
DB* db = t->db;
WriteOptions wo;
for (int i = 0; i < kGCNumKeys; ++i) {
std::string kv(ToString(i + id * kGCNumKeys));
ASSERT_OK(db->Put(wo, kv, kv));
}
t->done = true;
}
} // namespace
TEST_F(DBTest, GroupCommitTest) {
do {
Options options = CurrentOptions();
options.env = env_;
env_->log_write_slowdown_.store(100);
options.statistics = rocksdb::CreateDBStatistics();
Reopen(options);
// Start threads
GCThread thread[kGCNumThreads];
for (int id = 0; id < kGCNumThreads; id++) {
thread[id].id = id;
thread[id].db = db_;
thread[id].done = false;
env_->StartThread(GCThreadBody, &thread[id]);
}
for (int id = 0; id < kGCNumThreads; id++) {
while (thread[id].done == false) {
env_->SleepForMicroseconds(100000);
}
}
env_->log_write_slowdown_.store(0);
ASSERT_GT(TestGetTickerCount(options, WRITE_DONE_BY_OTHER), 0);
std::vector<std::string> expected_db;
for (int i = 0; i < kGCNumThreads * kGCNumKeys; ++i) {
expected_db.push_back(ToString(i));
}
std::sort(expected_db.begin(), expected_db.end());
Iterator* itr = db_->NewIterator(ReadOptions());
itr->SeekToFirst();
for (auto x : expected_db) {
ASSERT_TRUE(itr->Valid());
ASSERT_EQ(itr->key().ToString(), x);
ASSERT_EQ(itr->value().ToString(), x);
itr->Next();
}
ASSERT_TRUE(!itr->Valid());
delete itr;
HistogramData hist_data;
options.statistics->histogramData(DB_WRITE, &hist_data);
ASSERT_GT(hist_data.average, 0.0);
} while (ChangeOptions(kSkipNoSeekToLast));
}
namespace {
typedef std::map<std::string, std::string> KVMap;
}
class ModelDB : public DB {
public:
class ModelSnapshot : public Snapshot {
public:
KVMap map_;
virtual SequenceNumber GetSequenceNumber() const override {
// no need to call this
assert(false);
return 0;
}
};
explicit ModelDB(const Options& options) : options_(options) {}
using DB::Put;
virtual Status Put(const WriteOptions& o, ColumnFamilyHandle* cf,
const Slice& k, const Slice& v) override {
WriteBatch batch;
batch.Put(cf, k, v);
return Write(o, &batch);
}
using DB::Delete;
virtual Status Delete(const WriteOptions& o, ColumnFamilyHandle* cf,
const Slice& key) override {
WriteBatch batch;
batch.Delete(cf, key);
return Write(o, &batch);
}
using DB::SingleDelete;
virtual Status SingleDelete(const WriteOptions& o, ColumnFamilyHandle* cf,
const Slice& key) override {
WriteBatch batch;
batch.SingleDelete(cf, key);
return Write(o, &batch);
}
using DB::Merge;
virtual Status Merge(const WriteOptions& o, ColumnFamilyHandle* cf,
const Slice& k, const Slice& v) override {
WriteBatch batch;
batch.Merge(cf, k, v);
return Write(o, &batch);
}
using DB::Get;
virtual Status Get(const ReadOptions& options, ColumnFamilyHandle* cf,
const Slice& key, PinnableSlice* value) override {
return Status::NotSupported(key);
}
using DB::MultiGet;
virtual std::vector<Status> MultiGet(
const ReadOptions& options,
const std::vector<ColumnFamilyHandle*>& column_family,
const std::vector<Slice>& keys,
std::vector<std::string>* values) override {
std::vector<Status> s(keys.size(),
Status::NotSupported("Not implemented."));
return s;
}
#ifndef ROCKSDB_LITE
using DB::IngestExternalFile;
virtual Status IngestExternalFile(
ColumnFamilyHandle* column_family,
const std::vector<std::string>& external_files,
const IngestExternalFileOptions& options) override {
return Status::NotSupported("Not implemented.");
}
virtual Status VerifyChecksum() override {
return Status::NotSupported("Not implemented.");
}
using DB::GetPropertiesOfAllTables;
virtual Status GetPropertiesOfAllTables(
ColumnFamilyHandle* column_family,
TablePropertiesCollection* props) override {
return Status();
}
virtual Status GetPropertiesOfTablesInRange(
ColumnFamilyHandle* column_family, const Range* range, std::size_t n,
TablePropertiesCollection* props) override {
return Status();
}
#endif // ROCKSDB_LITE
using DB::KeyMayExist;
virtual bool KeyMayExist(const ReadOptions& options,
ColumnFamilyHandle* column_family, const Slice& key,
std::string* value,
bool* value_found = nullptr) override {
if (value_found != nullptr) {
*value_found = false;
}
return true; // Not Supported directly
}
using DB::NewIterator;
virtual Iterator* NewIterator(const ReadOptions& options,
ColumnFamilyHandle* column_family) override {
if (options.snapshot == nullptr) {
KVMap* saved = new KVMap;
*saved = map_;
return new ModelIter(saved, true);
} else {
const KVMap* snapshot_state =
&(reinterpret_cast<const ModelSnapshot*>(options.snapshot)->map_);
return new ModelIter(snapshot_state, false);
}
}
virtual Status NewIterators(
const ReadOptions& options,
const std::vector<ColumnFamilyHandle*>& column_family,
std::vector<Iterator*>* iterators) override {
return Status::NotSupported("Not supported yet");
}
virtual const Snapshot* GetSnapshot() override {
ModelSnapshot* snapshot = new ModelSnapshot;
snapshot->map_ = map_;
return snapshot;
}
virtual void ReleaseSnapshot(const Snapshot* snapshot) override {
delete reinterpret_cast<const ModelSnapshot*>(snapshot);
}
virtual Status Write(const WriteOptions& options,
WriteBatch* batch) override {
class Handler : public WriteBatch::Handler {
public:
KVMap* map_;
virtual void Put(const Slice& key, const Slice& value) override {
(*map_)[key.ToString()] = value.ToString();
}
virtual void Merge(const Slice& key, const Slice& value) override {
// ignore merge for now
// (*map_)[key.ToString()] = value.ToString();
}
virtual void Delete(const Slice& key) override {
map_->erase(key.ToString());
}
};
Handler handler;
handler.map_ = &map_;
return batch->Iterate(&handler);
}
using DB::GetProperty;
virtual bool GetProperty(ColumnFamilyHandle* column_family,
const Slice& property, std::string* value) override {
return false;
}
using DB::GetIntProperty;
virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
const Slice& property, uint64_t* value) override {
return false;
}
using DB::GetMapProperty;
virtual bool GetMapProperty(ColumnFamilyHandle* column_family,
const Slice& property,
std::map<std::string, double>* value) override {
return false;
}
using DB::GetAggregatedIntProperty;
virtual bool GetAggregatedIntProperty(const Slice& property,
uint64_t* value) override {
return false;
}
using DB::GetApproximateSizes;
virtual void GetApproximateSizes(ColumnFamilyHandle* column_family,
const Range* range, int n, uint64_t* sizes,
uint8_t include_flags
= INCLUDE_FILES) override {
for (int i = 0; i < n; i++) {
sizes[i] = 0;
}
}
using DB::GetApproximateMemTableStats;
virtual void GetApproximateMemTableStats(ColumnFamilyHandle* column_family,
const Range& range,
uint64_t* const count,
uint64_t* const size) override {
*count = 0;
*size = 0;
}
using DB::CompactRange;
virtual Status CompactRange(const CompactRangeOptions& options,
ColumnFamilyHandle* column_family,
const Slice* start, const Slice* end) override {
return Status::NotSupported("Not supported operation.");
}
virtual Status SetDBOptions(
const std::unordered_map<std::string, std::string>& new_options)
override {
return Status::NotSupported("Not supported operation.");
}
using DB::CompactFiles;
virtual Status CompactFiles(const CompactionOptions& compact_options,
ColumnFamilyHandle* column_family,
const std::vector<std::string>& input_file_names,
const int output_level,
const int output_path_id = -1) override {
return Status::NotSupported("Not supported operation.");
}
Status PauseBackgroundWork() override {
return Status::NotSupported("Not supported operation.");
}
Status ContinueBackgroundWork() override {
return Status::NotSupported("Not supported operation.");
}
Status EnableAutoCompaction(
const std::vector<ColumnFamilyHandle*>& column_family_handles) override {
return Status::NotSupported("Not supported operation.");
}
using DB::NumberLevels;
virtual int NumberLevels(ColumnFamilyHandle* column_family) override {
return 1;
}