blob: de603e50dc2a5af98b1426030c6292ef090a49e4 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "kudu/cfile/bloomfile-test-base.h"
#include <utility>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "kudu/cfile/bloomfile.h"
#include "kudu/cfile/cfile_util.h"
#include "kudu/fs/block_manager.h"
#include "kudu/fs/fs_manager.h"
#include "kudu/gutil/endian.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/util/bloom_filter.h"
#include "kudu/util/random.h"
#include "kudu/util/random_util.h"
#include "kudu/util/slice.h"
#include "kudu/util/stopwatch.h"
#include "kudu/util/test_macros.h"
#include "kudu/util/test_util.h"
DEFINE_int32(bloom_size_bytes, 4*1024, "Size of each bloom filter");
DEFINE_int32(n_keys, 10*1000, "Number of keys to insert into the file");
DEFINE_double(fp_rate, 0.01F, "False positive rate to aim for");
DEFINE_int64(benchmark_queries, 1000000, "Number of probes to benchmark");
DEFINE_bool(benchmark_should_hit, false, "Set to true for the benchmark to query rows which match");
using std::unique_ptr;
namespace kudu {
namespace cfile {
void BloomFileTestBase::SetUp() {
KuduTest::SetUp();
fs_manager_.reset(new FsManager(env_, FsManagerOpts(GetTestPath("fs_root"))));
ASSERT_OK(fs_manager_->CreateInitialFileSystemLayout());
ASSERT_OK(fs_manager_->Open());
}
void BloomFileTestBase::AppendBlooms(BloomFileWriter* bfw) {
uint64_t key_buf;
Slice key_slice(reinterpret_cast<const uint8_t*>(&key_buf),
sizeof(key_buf));
for (uint64_t i = 0; i < FLAGS_n_keys; i++) {
// Shift the key left a bit so that while querying, we can
// get a good mix of hits and misses while still staying within
// the real key range.
key_buf = BigEndian::FromHost64(i << kKeyShift);
ASSERT_OK_FAST(bfw->AppendKeys(&key_slice, 1));
}
}
void BloomFileTestBase::WriteTestBloomFile() {
unique_ptr<fs::WritableBlock> sink;
ASSERT_OK(fs_manager_->CreateNewBlock({}, &sink));
block_id_ = sink->id();
// Set sizing based on flags
BloomFilterSizing sizing = BloomFilterSizing::BySizeAndFPRate(
FLAGS_bloom_size_bytes, FLAGS_fp_rate);
ASSERT_NEAR(sizing.n_bytes(), FLAGS_bloom_size_bytes, FLAGS_bloom_size_bytes * 0.05);
ASSERT_GT(FLAGS_n_keys, sizing.expected_count())
<< "Invalid parameters: --n_keys isn't set large enough to fill even "
<< "one bloom filter of the requested --bloom_size_bytes";
BloomFileWriter bfw(std::move(sink), sizing);
ASSERT_OK(bfw.Start());
AppendBlooms(&bfw);
ASSERT_OK(bfw.Finish());
}
Status BloomFileTestBase::OpenBloomFile() {
unique_ptr<fs::ReadableBlock> source;
RETURN_NOT_OK(fs_manager_->OpenBlock(block_id_, &source));
return BloomFileReader::Open(std::move(source), ReaderOptions(), &bfr_);
}
uint64_t BloomFileTestBase::ReadBenchmark() {
Random rng(GetRandomSeed32());
uint64_t count_present = 0;
LOG_TIMING(INFO, strings::Substitute("Running $0 queries", FLAGS_benchmark_queries)) {
for (uint64_t i = 0; i < FLAGS_benchmark_queries; i++) {
uint64_t key = rng.Uniform(FLAGS_n_keys);
key <<= kKeyShift;
if (!FLAGS_benchmark_should_hit) {
// Since the keys are bitshifted, setting the last bit
// ensures that none of the queries will match.
key |= 1;
}
key = BigEndian::FromHost64(key);
Slice s(reinterpret_cast<uint8_t *>(&key), sizeof(key));
bool present;
CHECK_OK(bfr_->CheckKeyPresent(BloomKeyProbe(s), nullptr, &present));
if (present) count_present++;
}
}
return count_present;
}
} // namespace cfile
} // namespace kudu