src/kudu/util/block_bloom_filter-test.cc - kudu - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include "kudu/util/block_bloom_filter.h"

 #include <cmath> // IWYU pragma: keep
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
 #include <memory>
 #include <type_traits>
 #include <unordered_set>
 #include <utility>
 #include <vector>

 #include <gflags/gflags_declare.h>
 #include <glog/logging.h>
 #include <gtest/gtest.h>

 #include "kudu/util/hash.pb.h"
 #include "kudu/util/memory/arena.h"
 #include "kudu/util/random.h"
 #include "kudu/util/slice.h"
 #include "kudu/util/status.h"
 #include "kudu/util/test_macros.h"
 #include "kudu/util/test_util.h"

 DECLARE_bool(disable_blockbloomfilter_avx2);

 using std::unique_ptr;
 using std::unordered_set;
 using std::vector;

 namespace kudu {

 class BlockBloomFilterTest : public KuduTest {
  public:
   void SetUp() override {
     SeedRandom();
     allocator_ = DefaultBlockBloomFilterBufferAllocator::GetSingleton();
   }
   // Make a random uint32_t, avoiding the absent high bit and the low-entropy low bits
   // produced by rand().
   static uint32_t MakeRand() {
     uint32_t result = (rand() >> 8) & 0xffff;
     result <<= 16;
     result |= (rand() >> 8) & 0xffff;
     return result;
   }

   BlockBloomFilter* CreateBloomFilter(size_t log_space_bytes) {
     FLAGS_disable_blockbloomfilter_avx2 = (MakeRand() & 0x1) == 0;

     unique_ptr<BlockBloomFilter> bf(new BlockBloomFilter(allocator_));
     CHECK_OK(bf->Init(log_space_bytes, FAST_HASH, 0));
     bloom_filters_.emplace_back(move(bf));
     return bloom_filters_.back().get();
   }

   void TearDown() override {
     for (const auto& bf : bloom_filters_) {
       bf->Close();
     }
   }

  protected:
   DefaultBlockBloomFilterBufferAllocator* allocator_;

  private:
   vector<unique_ptr<BlockBloomFilter>> bloom_filters_;
 };

 // We can construct (and destruct) Bloom filters with different spaces.
 TEST_F(BlockBloomFilterTest, Constructor) {
   for (int i = 0; i < 30; ++i) {
     CreateBloomFilter(i);
   }
 }

 TEST_F(BlockBloomFilterTest, Clone) {
   Arena arena(1024);
   ArenaBlockBloomFilterBufferAllocator arena_allocator(&arena);
   std::shared_ptr<BlockBloomFilterBufferAllocatorIf> allocator_clone = arena_allocator.Clone();

   for (int log_space_bytes = 1; log_space_bytes <= 20; ++log_space_bytes) {
     auto* bf = CreateBloomFilter(log_space_bytes);
     int max_elems = BlockBloomFilter::MaxNdv(log_space_bytes, 0.01 /* fpp */);
     while (max_elems-- > 0) {
       bf->Insert(MakeRand());
     }
     unique_ptr<BlockBloomFilter> bf_clone;
     ASSERT_OK(bf->Clone(allocator_clone.get(), &bf_clone));
     ASSERT_NE(nullptr, bf_clone);
     ASSERT_EQ(*bf_clone, *bf);
   }
 }

 TEST_F(BlockBloomFilterTest, InvalidSpace) {
   BlockBloomFilter bf(allocator_);
   // Random number in the range [38, 64).
   const int log_space_bytes = 38 + rand() % (64 - 38);
   Status s = bf.Init(log_space_bytes, FAST_HASH, 0);
   ASSERT_TRUE(s.IsInvalidArgument());
   ASSERT_STR_CONTAINS(s.ToString(), "Bloom filter too large");
   bf.Close();
 }

 // Simple Arena allocator based test that would sometimes trigger
 // SIGSEGV due to misalignment of the "directory_" ptr on AVX operations
 // before adding support for 32/64 byte alignment in Arena allocator.
 // It simulates the allocation pattern in wire-protocol.cc.
 TEST_F(BlockBloomFilterTest, ArenaAligned) {
   Arena a(64);
   auto* allocator = a.NewObject<ArenaBlockBloomFilterBufferAllocator>(&a);
   auto* bf = a.NewObject<BlockBloomFilter>(allocator);
   bf->Init(6, FAST_HASH, 0);
   bool key = true;
   Slice s(reinterpret_cast<const uint8_t*>(&key), sizeof(key));
   bf->Insert(s);
   ASSERT_TRUE(bf->Find(s));
 }

 TEST_F(BlockBloomFilterTest, InvalidHashAlgorithm) {
   BlockBloomFilter bf(allocator_);
   Status s = bf.Init(4, UNKNOWN_HASH, 0);
   ASSERT_TRUE(s.IsInvalidArgument());
   ASSERT_STR_CONTAINS(s.ToString(), "Invalid/Unsupported hash algorithm");
 }

 // We can Insert() hashes into a Bloom filter with different spaces.
 TEST_F(BlockBloomFilterTest, Insert) {
   for (int i = 13; i < 17; ++i) {
     auto* bf = CreateBloomFilter(i);
     for (int k = 0; k < (1 << 15); ++k) {
       bf->Insert(MakeRand());
     }
   }
 }

 // After Insert()ing something into a Bloom filter, it can be found again immediately.
 TEST_F(BlockBloomFilterTest, Find) {
   for (int i = 13; i < 17; ++i) {
     auto* bf = CreateBloomFilter(i);
     for (int k = 0; k < (1 << 15); ++k) {
       const auto to_insert = MakeRand();
       bf->Insert(to_insert);
       EXPECT_TRUE(bf->Find(to_insert));
     }
   }
 }

 // After Insert()ing something into a Bloom filter, it can be found again much later.
 TEST_F(BlockBloomFilterTest, CumulativeFind) {
   for (int i = 5; i < 11; ++i) {
     vector<uint32_t> inserted;
     auto* bf = CreateBloomFilter(i);
     for (int k = 0; k < (1 << 10); ++k) {
       const uint32_t to_insert = MakeRand();
       inserted.push_back(to_insert);
       bf->Insert(to_insert);
       for (int n = 0; n < inserted.size(); ++n) {
         EXPECT_TRUE(bf->Find(inserted[n]));
       }
     }
   }
 }

 // The empirical false positives we find when looking for random items is with a constant
 // factor of the false positive probability the Bloom filter was constructed for.
 TEST_F(BlockBloomFilterTest, FindInvalid) {
   // We use a deterministic pseudorandom number generator with a set seed. The reason is
   // that with a run-dependent seed, there will always be inputs that can fail. That's a
   // potential argument for this to be a benchmark rather than a test, although the
   // measured quantity would be not time but deviation from predicted fpp.
   Random rgen(867 + 5309);
   static const int find_limit = 1 << 22;
   unordered_set<uint32_t> to_find;
   while (to_find.size() < find_limit) {
     to_find.insert(rgen.Next64());
   }
   static const int max_log_ndv = 19;
   unordered_set<uint32_t> to_insert;
   while (to_insert.size() < (1ULL << max_log_ndv)) {
     const auto candidate = rgen.Next64();
     if (to_find.find(candidate) == to_find.end()) {
       to_insert.insert(candidate);
     }
   }
   vector<uint32_t> shuffled_insert(to_insert.begin(), to_insert.end());
   for (int log_ndv = 12; log_ndv < max_log_ndv; ++log_ndv) {
     for (int log_fpp = 4; log_fpp < 12; ++log_fpp) {
       double fpp = 1.0 / (1 << log_fpp);
       const size_t ndv = 1 << log_ndv;
       const int log_heap_space = BlockBloomFilter::MinLogSpace(ndv, fpp);
       auto* bf = CreateBloomFilter(log_heap_space);
       // Fill up a BF with exactly as much ndv as we planned for it:
       for (size_t i = 0; i < ndv; ++i) {
         bf->Insert(shuffled_insert[i]);
       }
       int found = 0;
       // Now we sample from the set of possible hashes, looking for hits.
       for (const auto& i : to_find) {
         found += bf->Find(i);
       }
       EXPECT_LE(found, find_limit * fpp * 2)
           << "Too many false positives with -log2(fpp) = " << log_fpp
           << " and log_ndv = " << log_ndv << " and log_heap_space = " << log_heap_space;
       // Because the space is rounded up to a power of 2, we might actually get a lower
       // fpp than the one passed to MinLogSpace().
       const double expected_fpp = BlockBloomFilter::FalsePositiveProb(ndv, log_heap_space);
       // Fudge factors are present because filter characteristics are true in the limit,
       // and will deviate for small samples.
       EXPECT_GE(found, find_limit * expected_fpp * 0.75)
           << "Too few false positives with -log2(fpp) = " << log_fpp
           << " expected_fpp = " << expected_fpp;
       EXPECT_LE(found, find_limit * expected_fpp * 1.25)
           << "Too many false positives with -log2(fpp) = " << log_fpp
           << " expected_fpp = " << expected_fpp;
     }
   }
 }

 // Test that MaxNdv() and MinLogSpace() are dual
 TEST_F(BlockBloomFilterTest, MinSpaceMaxNdv) {
   for (int log2fpp = -2; log2fpp >= -30; --log2fpp) {
     const double fpp = pow(2, log2fpp);
     for (int given_log_space = 8; given_log_space < 30; ++given_log_space) {
       const size_t derived_ndv = BlockBloomFilter::MaxNdv(given_log_space, fpp);
       // If NO values can be added without exceeding fpp, then the space needed is
       // trivially zero. This becomes a useless test; skip to the next iteration.
       if (0 == derived_ndv) continue;
       int derived_log_space = BlockBloomFilter::MinLogSpace(derived_ndv, fpp);

       EXPECT_EQ(derived_log_space, given_log_space) << "fpp: " << fpp
                                                     << " derived_ndv: " << derived_ndv;

       // If we lower the fpp, we need more space; if we raise it we need less.
       derived_log_space = BlockBloomFilter::MinLogSpace(derived_ndv, fpp / 2);
       EXPECT_GE(derived_log_space, given_log_space) << "fpp: " << fpp
                                                     << " derived_ndv: " << derived_ndv;
       derived_log_space = BlockBloomFilter::MinLogSpace(derived_ndv, fpp * 2);
       EXPECT_LE(derived_log_space, given_log_space) << "fpp: " << fpp
                                                     << " derived_ndv: " << derived_ndv;
     }
     for (size_t given_ndv = 1000; given_ndv < 1000 * 1000; given_ndv *= 3) {
       const int derived_log_space = BlockBloomFilter::MinLogSpace(given_ndv, fpp);
       const size_t derived_ndv = BlockBloomFilter::MaxNdv(derived_log_space, fpp);

       // The max ndv is close to, but larger than, then ndv we asked for
       EXPECT_LE(given_ndv, derived_ndv) << "fpp: " << fpp
                                         << " derived_log_space: " << derived_log_space;
       EXPECT_GE(given_ndv * 2, derived_ndv)
           << "fpp: " << fpp << " derived_log_space: " << derived_log_space;

       // Changing the fpp changes the ndv capacity in the expected direction.
       size_t new_derived_ndv = BlockBloomFilter::MaxNdv(derived_log_space, fpp / 2);
       EXPECT_GE(derived_ndv, new_derived_ndv)
           << "fpp: " << fpp << " derived_log_space: " << derived_log_space;
       new_derived_ndv = BlockBloomFilter::MaxNdv(derived_log_space, fpp * 2);
       EXPECT_LE(derived_ndv, new_derived_ndv)
           << "fpp: " << fpp << " derived_log_space: " << derived_log_space;
     }
   }
 }

 TEST_F(BlockBloomFilterTest, MinSpaceEdgeCase) {
   int min_space = BlockBloomFilter::MinLogSpace(1, 0.75);
   EXPECT_GE(min_space, 0) << "LogSpace should always be >= 0";
 }

 // Check that MinLogSpace() and FalsePositiveProb() are dual
 TEST_F(BlockBloomFilterTest, MinSpaceForFpp) {
   for (size_t ndv = 10000; ndv < 100 * 1000 * 1000; ndv *= 1.1) {
     for (double fpp = 0.1; fpp > pow(2, -20); fpp *= 0.9) { // NOLINT: loop on double
       // When contructing a Bloom filter, we can request a particular fpp by calling
       // MinLogSpace().
       const int min_log_space = BlockBloomFilter::MinLogSpace(ndv, fpp);
       // However, at the resulting ndv and space, the expected fpp might be lower than
       // the one that was requested.
       double expected_fpp = BlockBloomFilter::FalsePositiveProb(ndv, min_log_space);
       EXPECT_LE(expected_fpp, fpp);
       // The fpp we get might be much lower than the one we asked for. However, if the
       // space were just one size smaller, the fpp we get would be larger than the one we
       // asked for.
       expected_fpp = BlockBloomFilter::FalsePositiveProb(ndv, min_log_space - 1);
       EXPECT_GE(expected_fpp, fpp);
       // Therefore, the return value of MinLogSpace() is actually the minimum
       // log space at which we can guarantee the requested fpp.
     }
   }
 }

 TEST_F(BlockBloomFilterTest, Or) {
   BlockBloomFilter* bf1 = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100, 0.01));
   BlockBloomFilter* bf2 = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100, 0.01));

   for (int i = 60; i < 80; ++i) bf2->Insert(i);
   for (int i = 0; i < 10; ++i) bf1->Insert(i);

   ASSERT_OK(bf1->Or(*bf2));
   for (int i = 0; i < 10; ++i) ASSERT_TRUE(bf1->Find(i)) << i;
   for (int i = 60; i < 80; ++i) ASSERT_TRUE(bf1->Find(i)) << i;

   // Insert another value to aggregated BloomFilter.
   for (int i = 11; i < 50; ++i) bf1->Insert(i);

   for (int i = 11; i < 50; ++i) ASSERT_TRUE(bf1->Find(i)) << i;
   ASSERT_FALSE(bf1->Find(81));

   // Check that AlwaysFalse() is updated correctly.
   BlockBloomFilter* bf3 = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100, 0.01));
   BlockBloomFilter* always_false = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100, 0.01));
   ASSERT_OK(bf3->Or(*always_false));
   EXPECT_TRUE(bf3->always_false());
   ASSERT_OK(bf3->Or(*bf2));
   EXPECT_FALSE(bf3->always_false());

   // Invalid argument test cases.
   BlockBloomFilter* bf4 = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100, 0.01));
   BlockBloomFilter* bf5 = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100000, 0.01));
   Status s = bf4->Or(*bf5);
   ASSERT_TRUE(s.IsInvalidArgument());
   ASSERT_STR_CONTAINS(s.ToString(), "Directory size don't match");

   // Test the public OrEqualArray() function.
   static constexpr size_t kNumBytes = 64;
   unique_ptr<uint8_t[]> a_ptr(new uint8_t[kNumBytes]);
   unique_ptr<uint8_t[]> b_ptr(new uint8_t[kNumBytes]);
   memset(a_ptr.get(), 0xDE, kNumBytes);
   memset(b_ptr.get(), 0, kNumBytes);
   ASSERT_OK(BlockBloomFilter::OrEqualArray(kNumBytes, a_ptr.get(), b_ptr.get()));
   ASSERT_EQ(0, memcmp(a_ptr.get(), b_ptr.get(), kNumBytes));
 }
 }  // namespace kudu
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	#include "kudu/util/block_bloom_filter.h"

	#include <cmath> // IWYU pragma: keep
	#include <cstdint>
	#include <cstdlib>
	#include <cstring>
	#include <memory>
	#include <type_traits>
	#include <unordered_set>
	#include <utility>
	#include <vector>

	#include <gflags/gflags_declare.h>
	#include <glog/logging.h>
	#include <gtest/gtest.h>

	#include "kudu/util/hash.pb.h"
	#include "kudu/util/memory/arena.h"
	#include "kudu/util/random.h"
	#include "kudu/util/slice.h"
	#include "kudu/util/status.h"
	#include "kudu/util/test_macros.h"
	#include "kudu/util/test_util.h"

	DECLARE_bool(disable_blockbloomfilter_avx2);

	using std::unique_ptr;
	using std::unordered_set;
	using std::vector;

	namespace kudu {

	class BlockBloomFilterTest : public KuduTest {
	public:
	void SetUp() override {
	SeedRandom();
	allocator_ = DefaultBlockBloomFilterBufferAllocator::GetSingleton();
	}
	// Make a random uint32_t, avoiding the absent high bit and the low-entropy low bits
	// produced by rand().
	static uint32_t MakeRand() {
	uint32_t result = (rand() >> 8) & 0xffff;
	result <<= 16;
	result \|= (rand() >> 8) & 0xffff;
	return result;
	}

	BlockBloomFilter* CreateBloomFilter(size_t log_space_bytes) {
	FLAGS_disable_blockbloomfilter_avx2 = (MakeRand() & 0x1) == 0;

	unique_ptr<BlockBloomFilter> bf(new BlockBloomFilter(allocator_));
	CHECK_OK(bf->Init(log_space_bytes, FAST_HASH, 0));
	bloom_filters_.emplace_back(move(bf));
	return bloom_filters_.back().get();
	}

	void TearDown() override {
	for (const auto& bf : bloom_filters_) {
	bf->Close();
	}
	}

	protected:
	DefaultBlockBloomFilterBufferAllocator* allocator_;

	private:
	vector<unique_ptr<BlockBloomFilter>> bloom_filters_;
	};

	// We can construct (and destruct) Bloom filters with different spaces.
	TEST_F(BlockBloomFilterTest, Constructor) {
	for (int i = 0; i < 30; ++i) {
	CreateBloomFilter(i);
	}
	}

	TEST_F(BlockBloomFilterTest, Clone) {
	Arena arena(1024);
	ArenaBlockBloomFilterBufferAllocator arena_allocator(&arena);
	std::shared_ptr<BlockBloomFilterBufferAllocatorIf> allocator_clone = arena_allocator.Clone();

	for (int log_space_bytes = 1; log_space_bytes <= 20; ++log_space_bytes) {
	auto* bf = CreateBloomFilter(log_space_bytes);
	int max_elems = BlockBloomFilter::MaxNdv(log_space_bytes, 0.01 /* fpp */);
	while (max_elems-- > 0) {
	bf->Insert(MakeRand());
	}
	unique_ptr<BlockBloomFilter> bf_clone;
	ASSERT_OK(bf->Clone(allocator_clone.get(), &bf_clone));
	ASSERT_NE(nullptr, bf_clone);
	ASSERT_EQ(bf_clone, bf);
	}
	}

	TEST_F(BlockBloomFilterTest, InvalidSpace) {
	BlockBloomFilter bf(allocator_);
	// Random number in the range [38, 64).
	const int log_space_bytes = 38 + rand() % (64 - 38);
	Status s = bf.Init(log_space_bytes, FAST_HASH, 0);
	ASSERT_TRUE(s.IsInvalidArgument());
	ASSERT_STR_CONTAINS(s.ToString(), "Bloom filter too large");
	bf.Close();
	}

	// Simple Arena allocator based test that would sometimes trigger
	// SIGSEGV due to misalignment of the "directory_" ptr on AVX operations
	// before adding support for 32/64 byte alignment in Arena allocator.
	// It simulates the allocation pattern in wire-protocol.cc.
	TEST_F(BlockBloomFilterTest, ArenaAligned) {
	Arena a(64);
	auto* allocator = a.NewObject<ArenaBlockBloomFilterBufferAllocator>(&a);
	auto* bf = a.NewObject<BlockBloomFilter>(allocator);
	bf->Init(6, FAST_HASH, 0);
	bool key = true;
	Slice s(reinterpret_cast<const uint8_t*>(&key), sizeof(key));
	bf->Insert(s);
	ASSERT_TRUE(bf->Find(s));
	}

	TEST_F(BlockBloomFilterTest, InvalidHashAlgorithm) {
	BlockBloomFilter bf(allocator_);
	Status s = bf.Init(4, UNKNOWN_HASH, 0);
	ASSERT_TRUE(s.IsInvalidArgument());
	ASSERT_STR_CONTAINS(s.ToString(), "Invalid/Unsupported hash algorithm");
	}

	// We can Insert() hashes into a Bloom filter with different spaces.
	TEST_F(BlockBloomFilterTest, Insert) {
	for (int i = 13; i < 17; ++i) {
	auto* bf = CreateBloomFilter(i);
	for (int k = 0; k < (1 << 15); ++k) {
	bf->Insert(MakeRand());
	}
	}
	}

	// After Insert()ing something into a Bloom filter, it can be found again immediately.
	TEST_F(BlockBloomFilterTest, Find) {
	for (int i = 13; i < 17; ++i) {
	auto* bf = CreateBloomFilter(i);
	for (int k = 0; k < (1 << 15); ++k) {
	const auto to_insert = MakeRand();
	bf->Insert(to_insert);
	EXPECT_TRUE(bf->Find(to_insert));
	}
	}
	}

	// After Insert()ing something into a Bloom filter, it can be found again much later.
	TEST_F(BlockBloomFilterTest, CumulativeFind) {
	for (int i = 5; i < 11; ++i) {
	vector<uint32_t> inserted;
	auto* bf = CreateBloomFilter(i);
	for (int k = 0; k < (1 << 10); ++k) {
	const uint32_t to_insert = MakeRand();
	inserted.push_back(to_insert);
	bf->Insert(to_insert);
	for (int n = 0; n < inserted.size(); ++n) {
	EXPECT_TRUE(bf->Find(inserted[n]));
	}
	}
	}
	}

	// The empirical false positives we find when looking for random items is with a constant
	// factor of the false positive probability the Bloom filter was constructed for.
	TEST_F(BlockBloomFilterTest, FindInvalid) {
	// We use a deterministic pseudorandom number generator with a set seed. The reason is
	// that with a run-dependent seed, there will always be inputs that can fail. That's a
	// potential argument for this to be a benchmark rather than a test, although the
	// measured quantity would be not time but deviation from predicted fpp.
	Random rgen(867 + 5309);
	static const int find_limit = 1 << 22;
	unordered_set<uint32_t> to_find;
	while (to_find.size() < find_limit) {
	to_find.insert(rgen.Next64());
	}
	static const int max_log_ndv = 19;
	unordered_set<uint32_t> to_insert;
	while (to_insert.size() < (1ULL << max_log_ndv)) {
	const auto candidate = rgen.Next64();
	if (to_find.find(candidate) == to_find.end()) {
	to_insert.insert(candidate);
	}
	}
	vector<uint32_t> shuffled_insert(to_insert.begin(), to_insert.end());
	for (int log_ndv = 12; log_ndv < max_log_ndv; ++log_ndv) {
	for (int log_fpp = 4; log_fpp < 12; ++log_fpp) {
	double fpp = 1.0 / (1 << log_fpp);
	const size_t ndv = 1 << log_ndv;
	const int log_heap_space = BlockBloomFilter::MinLogSpace(ndv, fpp);
	auto* bf = CreateBloomFilter(log_heap_space);
	// Fill up a BF with exactly as much ndv as we planned for it:
	for (size_t i = 0; i < ndv; ++i) {
	bf->Insert(shuffled_insert[i]);
	}
	int found = 0;
	// Now we sample from the set of possible hashes, looking for hits.
	for (const auto& i : to_find) {
	found += bf->Find(i);
	}
	EXPECT_LE(found, find_limit * fpp * 2)
	<< "Too many false positives with -log2(fpp) = " << log_fpp
	<< " and log_ndv = " << log_ndv << " and log_heap_space = " << log_heap_space;
	// Because the space is rounded up to a power of 2, we might actually get a lower
	// fpp than the one passed to MinLogSpace().
	const double expected_fpp = BlockBloomFilter::FalsePositiveProb(ndv, log_heap_space);
	// Fudge factors are present because filter characteristics are true in the limit,
	// and will deviate for small samples.
	EXPECT_GE(found, find_limit * expected_fpp * 0.75)
	<< "Too few false positives with -log2(fpp) = " << log_fpp
	<< " expected_fpp = " << expected_fpp;
	EXPECT_LE(found, find_limit * expected_fpp * 1.25)
	<< "Too many false positives with -log2(fpp) = " << log_fpp
	<< " expected_fpp = " << expected_fpp;
	}
	}
	}

	// Test that MaxNdv() and MinLogSpace() are dual
	TEST_F(BlockBloomFilterTest, MinSpaceMaxNdv) {
	for (int log2fpp = -2; log2fpp >= -30; --log2fpp) {
	const double fpp = pow(2, log2fpp);
	for (int given_log_space = 8; given_log_space < 30; ++given_log_space) {
	const size_t derived_ndv = BlockBloomFilter::MaxNdv(given_log_space, fpp);
	// If NO values can be added without exceeding fpp, then the space needed is
	// trivially zero. This becomes a useless test; skip to the next iteration.
	if (0 == derived_ndv) continue;
	int derived_log_space = BlockBloomFilter::MinLogSpace(derived_ndv, fpp);

	EXPECT_EQ(derived_log_space, given_log_space) << "fpp: " << fpp
	<< " derived_ndv: " << derived_ndv;

	// If we lower the fpp, we need more space; if we raise it we need less.
	derived_log_space = BlockBloomFilter::MinLogSpace(derived_ndv, fpp / 2);
	EXPECT_GE(derived_log_space, given_log_space) << "fpp: " << fpp
	<< " derived_ndv: " << derived_ndv;
	derived_log_space = BlockBloomFilter::MinLogSpace(derived_ndv, fpp * 2);
	EXPECT_LE(derived_log_space, given_log_space) << "fpp: " << fpp
	<< " derived_ndv: " << derived_ndv;
	}
	for (size_t given_ndv = 1000; given_ndv < 1000 * 1000; given_ndv *= 3) {
	const int derived_log_space = BlockBloomFilter::MinLogSpace(given_ndv, fpp);
	const size_t derived_ndv = BlockBloomFilter::MaxNdv(derived_log_space, fpp);

	// The max ndv is close to, but larger than, then ndv we asked for
	EXPECT_LE(given_ndv, derived_ndv) << "fpp: " << fpp
	<< " derived_log_space: " << derived_log_space;
	EXPECT_GE(given_ndv * 2, derived_ndv)
	<< "fpp: " << fpp << " derived_log_space: " << derived_log_space;

	// Changing the fpp changes the ndv capacity in the expected direction.
	size_t new_derived_ndv = BlockBloomFilter::MaxNdv(derived_log_space, fpp / 2);
	EXPECT_GE(derived_ndv, new_derived_ndv)
	<< "fpp: " << fpp << " derived_log_space: " << derived_log_space;
	new_derived_ndv = BlockBloomFilter::MaxNdv(derived_log_space, fpp * 2);
	EXPECT_LE(derived_ndv, new_derived_ndv)
	<< "fpp: " << fpp << " derived_log_space: " << derived_log_space;
	}
	}
	}

	TEST_F(BlockBloomFilterTest, MinSpaceEdgeCase) {
	int min_space = BlockBloomFilter::MinLogSpace(1, 0.75);
	EXPECT_GE(min_space, 0) << "LogSpace should always be >= 0";
	}

	// Check that MinLogSpace() and FalsePositiveProb() are dual
	TEST_F(BlockBloomFilterTest, MinSpaceForFpp) {
	for (size_t ndv = 10000; ndv < 100 * 1000 * 1000; ndv *= 1.1) {
	for (double fpp = 0.1; fpp > pow(2, -20); fpp *= 0.9) { // NOLINT: loop on double
	// When contructing a Bloom filter, we can request a particular fpp by calling
	// MinLogSpace().
	const int min_log_space = BlockBloomFilter::MinLogSpace(ndv, fpp);
	// However, at the resulting ndv and space, the expected fpp might be lower than
	// the one that was requested.
	double expected_fpp = BlockBloomFilter::FalsePositiveProb(ndv, min_log_space);
	EXPECT_LE(expected_fpp, fpp);
	// The fpp we get might be much lower than the one we asked for. However, if the
	// space were just one size smaller, the fpp we get would be larger than the one we
	// asked for.
	expected_fpp = BlockBloomFilter::FalsePositiveProb(ndv, min_log_space - 1);
	EXPECT_GE(expected_fpp, fpp);
	// Therefore, the return value of MinLogSpace() is actually the minimum
	// log space at which we can guarantee the requested fpp.
	}
	}
	}

	TEST_F(BlockBloomFilterTest, Or) {
	BlockBloomFilter* bf1 = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100, 0.01));
	BlockBloomFilter* bf2 = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100, 0.01));

	for (int i = 60; i < 80; ++i) bf2->Insert(i);
	for (int i = 0; i < 10; ++i) bf1->Insert(i);

	ASSERT_OK(bf1->Or(*bf2));
	for (int i = 0; i < 10; ++i) ASSERT_TRUE(bf1->Find(i)) << i;
	for (int i = 60; i < 80; ++i) ASSERT_TRUE(bf1->Find(i)) << i;

	// Insert another value to aggregated BloomFilter.
	for (int i = 11; i < 50; ++i) bf1->Insert(i);

	for (int i = 11; i < 50; ++i) ASSERT_TRUE(bf1->Find(i)) << i;
	ASSERT_FALSE(bf1->Find(81));

	// Check that AlwaysFalse() is updated correctly.
	BlockBloomFilter* bf3 = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100, 0.01));
	BlockBloomFilter* always_false = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100, 0.01));
	ASSERT_OK(bf3->Or(*always_false));
	EXPECT_TRUE(bf3->always_false());
	ASSERT_OK(bf3->Or(*bf2));
	EXPECT_FALSE(bf3->always_false());

	// Invalid argument test cases.
	BlockBloomFilter* bf4 = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100, 0.01));
	BlockBloomFilter* bf5 = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100000, 0.01));
	Status s = bf4->Or(*bf5);
	ASSERT_TRUE(s.IsInvalidArgument());
	ASSERT_STR_CONTAINS(s.ToString(), "Directory size don't match");

	// Test the public OrEqualArray() function.
	static constexpr size_t kNumBytes = 64;
	unique_ptr<uint8_t[]> a_ptr(new uint8_t[kNumBytes]);
	unique_ptr<uint8_t[]> b_ptr(new uint8_t[kNumBytes]);
	memset(a_ptr.get(), 0xDE, kNumBytes);
	memset(b_ptr.get(), 0, kNumBytes);
	ASSERT_OK(BlockBloomFilter::OrEqualArray(kNumBytes, a_ptr.get(), b_ptr.get()));
	ASSERT_EQ(0, memcmp(a_ptr.get(), b_ptr.get(), kNumBytes));
	}
	} // namespace kudu