blob: 6c63aaaa622e05e791fc384bb2f8418f8db97d12 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "kudu/cfile/bloomfile.h"
#include <cstdint>
#include <cstdlib>
#include <memory>
#include <ostream>
#include <utility>
#include <gflags/gflags_declare.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "kudu/cfile/bloomfile-test-base.h"
#include "kudu/cfile/cfile_util.h"
#include "kudu/fs/block_manager.h"
#include "kudu/fs/fs-test-util.h"
#include "kudu/fs/fs_manager.h"
#include "kudu/gutil/endian.h"
#include "kudu/util/bloom_filter.h"
#include "kudu/util/mem_tracker.h"
#include "kudu/util/slice.h"
#include "kudu/util/test_macros.h"
DECLARE_int32(bloom_size_bytes);
DECLARE_int32(n_keys);
DECLARE_double(fp_rate);
DECLARE_int64(benchmark_queries);
DECLARE_bool(benchmark_should_hit);
using kudu::fs::CountingReadableBlock;
using kudu::fs::ReadableBlock;
using std::shared_ptr;
using std::unique_ptr;
namespace kudu {
namespace cfile {
class BloomFileTest : public BloomFileTestBase {
protected:
void VerifyBloomFile() {
// Verify all the keys that we inserted probe as present.
for (uint64_t i = 0; i < FLAGS_n_keys; i++) {
uint64_t i_byteswapped = BigEndian::FromHost64(i << kKeyShift);
Slice s(reinterpret_cast<char *>(&i_byteswapped), sizeof(i));
bool present = false;
ASSERT_OK_FAST(bfr()->CheckKeyPresent(BloomKeyProbe(s), nullptr, &present));
ASSERT_TRUE(present);
}
int positive_count = 0;
// Check that the FP rate for keys we didn't insert is what we expect.
for (uint64_t i = 0; i < FLAGS_n_keys; i++) {
uint64_t key = random();
Slice s(reinterpret_cast<char *>(&key), sizeof(key));
bool present = false;
ASSERT_OK_FAST(bfr()->CheckKeyPresent(BloomKeyProbe(s), nullptr, &present));
if (present) {
positive_count++;
}
}
double fp_rate = static_cast<double>(positive_count) / FLAGS_n_keys;
LOG(INFO) << "fp_rate: " << fp_rate << "(" << positive_count << "/" << FLAGS_n_keys << ")";
ASSERT_LT(fp_rate, FLAGS_fp_rate + FLAGS_fp_rate * 0.20f)
<< "Should be no more than 1.2x the expected FP rate";
}
};
TEST_F(BloomFileTest, TestWriteAndRead) {
NO_FATALS(WriteTestBloomFile());
ASSERT_OK(OpenBloomFile());
VerifyBloomFile();
}
#ifdef NDEBUG
TEST_F(BloomFileTest, Benchmark) {
NO_FATALS(WriteTestBloomFile());
ASSERT_OK(OpenBloomFile());
uint64_t count_present = ReadBenchmark();
double hit_rate = static_cast<double>(count_present) /
static_cast<double>(FLAGS_benchmark_queries);
LOG(INFO) << "Hit Rate: " << hit_rate <<
"(" << count_present << "/" << FLAGS_benchmark_queries << ")";
if (FLAGS_benchmark_should_hit) {
ASSERT_EQ(count_present, FLAGS_benchmark_queries);
} else {
ASSERT_LT(hit_rate, FLAGS_fp_rate + FLAGS_fp_rate * 0.20f)
<< "Should be no more than 1.2x the expected FP rate";
}
}
#endif
TEST_F(BloomFileTest, TestLazyInit) {
NO_FATALS(WriteTestBloomFile());
shared_ptr<MemTracker> tracker = MemTracker::CreateTracker(-1, "test");
int64_t initial_mem_usage = tracker->consumption();
// Open the bloom file using a "counting" readable block.
unique_ptr<ReadableBlock> block;
ASSERT_OK(fs_manager()->OpenBlock(block_id(), &block));
size_t bytes_read = 0;
unique_ptr<ReadableBlock> count_block(
new CountingReadableBlock(std::move(block), &bytes_read));
// Lazily opening the bloom file should not trigger any reads,
// and the file size should be available before Init().
unique_ptr<BloomFileReader> reader;
ReaderOptions opts;
opts.parent_mem_tracker = tracker;
ASSERT_OK(BloomFileReader::OpenNoInit(std::move(count_block), opts, &reader));
ASSERT_EQ(0, bytes_read);
int64_t lazy_mem_usage = tracker->consumption();
ASSERT_GT(lazy_mem_usage, initial_mem_usage);
ASSERT_GT(reader->FileSize(), 0);
// But initializing it should (only the first time), and the bloom's
// memory usage should increase.
ASSERT_OK(reader->Init(nullptr));
ASSERT_GT(bytes_read, 0);
size_t bytes_read_after_init = bytes_read;
ASSERT_OK(reader->Init(nullptr));
ASSERT_EQ(bytes_read_after_init, bytes_read);
ASSERT_GT(tracker->consumption(), lazy_mem_usage);
// And let's test non-lazy open for good measure; it should yield the
// same number of bytes read.
ASSERT_OK(fs_manager()->OpenBlock(block_id(), &block));
bytes_read = 0;
count_block.reset(new CountingReadableBlock(std::move(block), &bytes_read));
ASSERT_OK(BloomFileReader::Open(std::move(count_block), ReaderOptions(), &reader));
ASSERT_EQ(bytes_read_after_init, bytes_read);
}
} // namespace cfile
} // namespace kudu