blob: 818af9c6398fffd32290c68010ca16de38491c50 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.cassandra.utils;
import java.io.*;
import java.nio.ByteBuffer;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Random;
import java.util.Set;
import org.junit.*;
import org.apache.cassandra.dht.IPartitioner;
import org.apache.cassandra.dht.Murmur3Partitioner;
import org.apache.cassandra.io.util.BufferedDataOutputStreamPlus;
import org.apache.cassandra.io.util.DataOutputBuffer;
import org.apache.cassandra.io.util.DataOutputStreamPlus;
import org.apache.cassandra.io.util.FileUtils;
import org.apache.cassandra.utils.IFilter.FilterKey;
import org.apache.cassandra.utils.KeyGenerator.RandomStringGenerator;
public class BloomFilterTest
{
public IFilter bfOldFormat;
public IFilter bfInvHashes;
public BloomFilterTest()
{
}
public static IFilter testSerialize(IFilter f, boolean oldBfHashOrder) throws IOException
{
f.add(FilterTestHelper.bytes("a"));
DataOutputBuffer out = new DataOutputBuffer();
FilterFactory.serialize(f, out);
ByteArrayInputStream in = new ByteArrayInputStream(out.getData(), 0, out.getLength());
IFilter f2 = FilterFactory.deserialize(new DataInputStream(in), true, oldBfHashOrder);
assert f2.isPresent(FilterTestHelper.bytes("a"));
assert !f2.isPresent(FilterTestHelper.bytes("b"));
return f2;
}
@Before
public void setup()
{
bfOldFormat = FilterFactory.getFilter(10000L, FilterTestHelper.MAX_FAILURE_RATE, true, true);
bfInvHashes = FilterFactory.getFilter(10000L, FilterTestHelper.MAX_FAILURE_RATE, true, false);
}
@After
public void destroy()
{
bfOldFormat.close();
bfInvHashes.close();
}
@Test(expected = UnsupportedOperationException.class)
public void testBloomLimits1()
{
int maxBuckets = BloomCalculations.probs.length - 1;
int maxK = BloomCalculations.probs[maxBuckets].length - 1;
// possible
BloomCalculations.computeBloomSpec(maxBuckets, BloomCalculations.probs[maxBuckets][maxK]);
// impossible, throws
BloomCalculations.computeBloomSpec(maxBuckets, BloomCalculations.probs[maxBuckets][maxK] / 2);
}
@Test
public void testOne()
{
bfOldFormat.add(FilterTestHelper.bytes("a"));
assert bfOldFormat.isPresent(FilterTestHelper.bytes("a"));
assert !bfOldFormat.isPresent(FilterTestHelper.bytes("b"));
bfInvHashes.add(FilterTestHelper.bytes("a"));
assert bfInvHashes.isPresent(FilterTestHelper.bytes("a"));
assert !bfInvHashes.isPresent(FilterTestHelper.bytes("b"));
}
@Test
public void testFalsePositivesInt()
{
FilterTestHelper.testFalsePositives(bfOldFormat, FilterTestHelper.intKeys(), FilterTestHelper.randomKeys2());
FilterTestHelper.testFalsePositives(bfInvHashes, FilterTestHelper.intKeys(), FilterTestHelper.randomKeys2());
}
@Test
public void testFalsePositivesRandom()
{
FilterTestHelper.testFalsePositives(bfOldFormat, FilterTestHelper.randomKeys(), FilterTestHelper.randomKeys2());
FilterTestHelper.testFalsePositives(bfInvHashes, FilterTestHelper.randomKeys(), FilterTestHelper.randomKeys2());
}
@Test
public void testWords()
{
if (KeyGenerator.WordGenerator.WORDS == 0)
{
return;
}
IFilter bf2 = FilterFactory.getFilter(KeyGenerator.WordGenerator.WORDS / 2, FilterTestHelper.MAX_FAILURE_RATE, true, false);
int skipEven = KeyGenerator.WordGenerator.WORDS % 2 == 0 ? 0 : 2;
FilterTestHelper.testFalsePositives(bf2,
new KeyGenerator.WordGenerator(skipEven, 2),
new KeyGenerator.WordGenerator(1, 2));
bf2.close();
// new, swapped hash values bloom filter
bf2 = FilterFactory.getFilter(KeyGenerator.WordGenerator.WORDS / 2, FilterTestHelper.MAX_FAILURE_RATE, true, true);
FilterTestHelper.testFalsePositives(bf2,
new KeyGenerator.WordGenerator(skipEven, 2),
new KeyGenerator.WordGenerator(1, 2));
bf2.close();
}
@Test
public void testSerialize() throws IOException
{
BloomFilterTest.testSerialize(bfOldFormat, true).close();
BloomFilterTest.testSerialize(bfInvHashes, false).close();
}
@Test
@Ignore
public void testManyRandom()
{
testManyRandom(FilterTestHelper.randomKeys(), false);
testManyRandom(FilterTestHelper.randomKeys(), true);
}
private static void testManyRandom(Iterator<ByteBuffer> keys, boolean oldBfHashOrder)
{
int MAX_HASH_COUNT = 128;
Set<Long> hashes = new HashSet<>();
long collisions = 0;
while (keys.hasNext())
{
hashes.clear();
FilterKey buf = FilterTestHelper.wrap(keys.next());
BloomFilter bf = (BloomFilter) FilterFactory.getFilter(10, 1, false, oldBfHashOrder);
for (long hashIndex : bf.getHashBuckets(buf, MAX_HASH_COUNT, 1024 * 1024))
{
hashes.add(hashIndex);
}
collisions += (MAX_HASH_COUNT - hashes.size());
bf.close();
}
Assert.assertTrue("collisions=" + collisions, collisions <= 100);
}
@Test(expected = UnsupportedOperationException.class)
public void testOffHeapException()
{
long numKeys = ((long)Integer.MAX_VALUE) * 64L + 1L; // approx 128 Billion
FilterFactory.getFilter(numKeys, 0.01d, true, true).close();
}
@Test
public void compareCachedKeyOldHashOrder()
{
BloomFilter bf1 = (BloomFilter) FilterFactory.getFilter(FilterTestHelper.ELEMENTS / 2, FilterTestHelper.MAX_FAILURE_RATE, false, true);
BloomFilter bf2 = (BloomFilter) FilterFactory.getFilter(FilterTestHelper.ELEMENTS / 2, FilterTestHelper.MAX_FAILURE_RATE, false, true);
BloomFilter bf3 = (BloomFilter) FilterFactory.getFilter(FilterTestHelper.ELEMENTS / 2, FilterTestHelper.MAX_FAILURE_RATE, false, true);
RandomStringGenerator gen1 = new KeyGenerator.RandomStringGenerator(new Random().nextInt(), FilterTestHelper.ELEMENTS);
// make sure all bitsets are empty.
BitSetTest.compare(bf1.bitset, bf2.bitset);
BitSetTest.compare(bf1.bitset, bf3.bitset);
while (gen1.hasNext())
{
ByteBuffer key = gen1.next();
FilterKey cached = FilterTestHelper.wrapCached(key);
bf1.add(FilterTestHelper.wrap(key));
bf2.add(cached);
bf3.add(cached);
}
BitSetTest.compare(bf1.bitset, bf2.bitset);
BitSetTest.compare(bf1.bitset, bf3.bitset);
}
@Test
public void compareCachedKeyNewHashOrder()
{
try (BloomFilter bf1 = (BloomFilter) FilterFactory.getFilter(FilterTestHelper.ELEMENTS / 2, FilterTestHelper.MAX_FAILURE_RATE, false, false);
BloomFilter bf2 = (BloomFilter) FilterFactory.getFilter(FilterTestHelper.ELEMENTS / 2, FilterTestHelper.MAX_FAILURE_RATE, false, false);
BloomFilter bf3 = (BloomFilter) FilterFactory.getFilter(FilterTestHelper.ELEMENTS / 2, FilterTestHelper.MAX_FAILURE_RATE, false, false))
{
RandomStringGenerator gen1 = new KeyGenerator.RandomStringGenerator(new Random().nextInt(), FilterTestHelper.ELEMENTS);
// make sure all bitsets are empty.
BitSetTest.compare(bf1.bitset, bf2.bitset);
BitSetTest.compare(bf1.bitset, bf3.bitset);
while (gen1.hasNext())
{
ByteBuffer key = gen1.next();
FilterKey cached = FilterTestHelper.wrapCached(key);
bf1.add(FilterTestHelper.wrap(key));
bf2.add(cached);
bf3.add(cached);
}
BitSetTest.compare(bf1.bitset, bf2.bitset);
BitSetTest.compare(bf1.bitset, bf3.bitset);
}
}
@Test
@Ignore
public void testHugeBFSerialization() throws IOException
{
hugeBFSerialization(false);
hugeBFSerialization(true);
}
static void hugeBFSerialization(boolean oldBfHashOrder) throws IOException
{
ByteBuffer test = ByteBuffer.wrap(new byte[] {0, 1});
File file = FileUtils.createTempFile("bloomFilterTest-", ".dat");
BloomFilter filter = (BloomFilter) FilterFactory.getFilter(((long) Integer.MAX_VALUE / 8) + 1, 0.01d, true, oldBfHashOrder);
filter.add(FilterTestHelper.wrap(test));
DataOutputStreamPlus out = new BufferedDataOutputStreamPlus(new FileOutputStream(file));
FilterFactory.serialize(filter, out);
filter.bitset.serialize(out);
out.close();
filter.close();
DataInputStream in = new DataInputStream(new FileInputStream(file));
BloomFilter filter2 = (BloomFilter) FilterFactory.deserialize(in, true, oldBfHashOrder);
Assert.assertTrue(filter2.isPresent(FilterTestHelper.wrap(test)));
FileUtils.closeQuietly(in);
filter2.close();
}
@Test
public void testMurmur3FilterHash()
{
IPartitioner partitioner = new Murmur3Partitioner();
Iterator<ByteBuffer> gen = new KeyGenerator.RandomStringGenerator(new Random().nextInt(), FilterTestHelper.ELEMENTS);
long[] expected = new long[2];
long[] actual = new long[2];
while (gen.hasNext())
{
expected[0] = 1;
expected[1] = 2;
actual[0] = 3;
actual[1] = 4;
ByteBuffer key = gen.next();
FilterKey expectedKey = FilterTestHelper.wrap(key);
FilterKey actualKey = partitioner.decorateKey(key);
actualKey.filterHash(actual);
expectedKey.filterHash(expected);
Assert.assertArrayEquals(expected, actual);
}
}
}