| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.flink.runtime.operators.util; |
| |
| import org.apache.flink.core.memory.HeapMemorySegment; |
| import org.apache.flink.core.memory.MemorySegment; |
| |
| import org.apache.flink.core.memory.MemorySegmentFactory; |
| import org.junit.BeforeClass; |
| import org.junit.Test; |
| |
| import static org.junit.Assert.assertEquals; |
| import static org.junit.Assert.assertFalse; |
| import static org.junit.Assert.assertTrue; |
| |
| public class BloomFilterTest { |
| |
| private static BloomFilter bloomFilter; |
| private static final int INPUT_SIZE = 1024; |
| private static final double FALSE_POSITIVE_PROBABILITY = 0.05; |
| |
| @BeforeClass |
| public static void init() { |
| int bitsSize = BloomFilter.optimalNumOfBits(INPUT_SIZE, FALSE_POSITIVE_PROBABILITY); |
| bitsSize = bitsSize + (Long.SIZE - (bitsSize % Long.SIZE)); |
| int byteSize = bitsSize >>> 3; |
| MemorySegment memorySegment = MemorySegmentFactory.allocateUnpooledSegment(byteSize); |
| bloomFilter = new BloomFilter(INPUT_SIZE, byteSize); |
| bloomFilter.setBitsLocation(memorySegment, 0); |
| } |
| |
| @Test(expected = IllegalArgumentException.class) |
| public void testBloomFilterArguments1() { |
| new BloomFilter(-1, 128); |
| } |
| |
| @Test(expected = IllegalArgumentException.class) |
| public void testBloomFilterArguments2() { |
| new BloomFilter(0, 128); |
| } |
| |
| @Test(expected = IllegalArgumentException.class) |
| public void testBloomFilterArguments3() { |
| new BloomFilter(1024, -1); |
| } |
| |
| @Test(expected = IllegalArgumentException.class) |
| public void testBloomFilterArguments4() { |
| new BloomFilter(1024, 0); |
| } |
| |
| @Test(expected = IllegalArgumentException.class) |
| public void testBloomFilterArguments5() { |
| new BloomFilter(1024, 21); |
| } |
| |
| @Test |
| public void testBloomNumBits() { |
| assertEquals(0, BloomFilter.optimalNumOfBits(0, 0)); |
| assertEquals(0, BloomFilter.optimalNumOfBits(0, 1)); |
| assertEquals(0, BloomFilter.optimalNumOfBits(1, 1)); |
| assertEquals(7, BloomFilter.optimalNumOfBits(1, 0.03)); |
| assertEquals(72, BloomFilter.optimalNumOfBits(10, 0.03)); |
| assertEquals(729, BloomFilter.optimalNumOfBits(100, 0.03)); |
| assertEquals(7298, BloomFilter.optimalNumOfBits(1000, 0.03)); |
| assertEquals(72984, BloomFilter.optimalNumOfBits(10000, 0.03)); |
| assertEquals(729844, BloomFilter.optimalNumOfBits(100000, 0.03)); |
| assertEquals(7298440, BloomFilter.optimalNumOfBits(1000000, 0.03)); |
| assertEquals(6235224, BloomFilter.optimalNumOfBits(1000000, 0.05)); |
| } |
| |
| @Test |
| public void testBloomFilterNumHashFunctions() { |
| assertEquals(1, BloomFilter.optimalNumOfHashFunctions(-1, -1)); |
| assertEquals(1, BloomFilter.optimalNumOfHashFunctions(0, 0)); |
| assertEquals(1, BloomFilter.optimalNumOfHashFunctions(10, 0)); |
| assertEquals(1, BloomFilter.optimalNumOfHashFunctions(10, 10)); |
| assertEquals(7, BloomFilter.optimalNumOfHashFunctions(10, 100)); |
| assertEquals(1, BloomFilter.optimalNumOfHashFunctions(100, 100)); |
| assertEquals(1, BloomFilter.optimalNumOfHashFunctions(1000, 100)); |
| assertEquals(1, BloomFilter.optimalNumOfHashFunctions(10000, 100)); |
| assertEquals(1, BloomFilter.optimalNumOfHashFunctions(100000, 100)); |
| assertEquals(1, BloomFilter.optimalNumOfHashFunctions(1000000, 100)); |
| } |
| |
| @Test |
| public void testBloomFilterFalsePositiveProbability() { |
| assertEquals(7298440, BloomFilter.optimalNumOfBits(1000000, 0.03)); |
| assertEquals(6235224, BloomFilter.optimalNumOfBits(1000000, 0.05)); |
| assertEquals(4792529, BloomFilter.optimalNumOfBits(1000000, 0.1)); |
| assertEquals(3349834, BloomFilter.optimalNumOfBits(1000000, 0.2)); |
| assertEquals(2505911, BloomFilter.optimalNumOfBits(1000000, 0.3)); |
| assertEquals(1907139, BloomFilter.optimalNumOfBits(1000000, 0.4)); |
| |
| // Make sure the estimated fpp error is less than 1%. |
| assertTrue(Math.abs(BloomFilter.estimateFalsePositiveProbability(1000000, 7298440) - 0.03) < 0.01); |
| assertTrue(Math.abs(BloomFilter.estimateFalsePositiveProbability(1000000, 6235224) - 0.05) < 0.01); |
| assertTrue(Math.abs(BloomFilter.estimateFalsePositiveProbability(1000000, 4792529) - 0.1) < 0.01); |
| assertTrue(Math.abs(BloomFilter.estimateFalsePositiveProbability(1000000, 3349834) - 0.2) < 0.01); |
| assertTrue(Math.abs(BloomFilter.estimateFalsePositiveProbability(1000000, 2505911) - 0.3) < 0.01); |
| assertTrue(Math.abs(BloomFilter.estimateFalsePositiveProbability(1000000, 1907139) - 0.4) < 0.01); |
| } |
| |
| @Test |
| public void testHashcodeInput() { |
| bloomFilter.reset(); |
| int val1 = "val1".hashCode(); |
| int val2 = "val2".hashCode(); |
| int val3 = "val3".hashCode(); |
| int val4 = "val4".hashCode(); |
| int val5 = "val5".hashCode(); |
| |
| assertFalse(bloomFilter.testHash(val1)); |
| assertFalse(bloomFilter.testHash(val2)); |
| assertFalse(bloomFilter.testHash(val3)); |
| assertFalse(bloomFilter.testHash(val4)); |
| assertFalse(bloomFilter.testHash(val5)); |
| bloomFilter.addHash(val1); |
| assertTrue(bloomFilter.testHash(val1)); |
| assertFalse(bloomFilter.testHash(val2)); |
| assertFalse(bloomFilter.testHash(val3)); |
| assertFalse(bloomFilter.testHash(val4)); |
| assertFalse(bloomFilter.testHash(val5)); |
| bloomFilter.addHash(val2); |
| assertTrue(bloomFilter.testHash(val1)); |
| assertTrue(bloomFilter.testHash(val2)); |
| assertFalse(bloomFilter.testHash(val3)); |
| assertFalse(bloomFilter.testHash(val4)); |
| assertFalse(bloomFilter.testHash(val5)); |
| bloomFilter.addHash(val3); |
| assertTrue(bloomFilter.testHash(val1)); |
| assertTrue(bloomFilter.testHash(val2)); |
| assertTrue(bloomFilter.testHash(val3)); |
| assertFalse(bloomFilter.testHash(val4)); |
| assertFalse(bloomFilter.testHash(val5)); |
| bloomFilter.addHash(val4); |
| assertTrue(bloomFilter.testHash(val1)); |
| assertTrue(bloomFilter.testHash(val2)); |
| assertTrue(bloomFilter.testHash(val3)); |
| assertTrue(bloomFilter.testHash(val4)); |
| assertFalse(bloomFilter.testHash(val5)); |
| bloomFilter.addHash(val5); |
| assertTrue(bloomFilter.testHash(val1)); |
| assertTrue(bloomFilter.testHash(val2)); |
| assertTrue(bloomFilter.testHash(val3)); |
| assertTrue(bloomFilter.testHash(val4)); |
| assertTrue(bloomFilter.testHash(val5)); |
| } |
| } |