blob: 69bf67a1b49cc2db7c4c7c256872f7dbb75b5f9b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sling.resourceresolver.impl.mapping;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.math.BigInteger;
import java.util.HashSet;
import java.util.Random;
import org.junit.Test;
/**
* Test the bloom filter utility class.
*/
public class BloomFilterUtilsTest {
/**
* Program to calculate the best shift and multiply constants.
*/
public static void main(String... args) {
Random random = new Random(1);
HashSet<String> inSet = new HashSet<String>();
while (inSet.size() < 100) {
inSet.add(randomString(random));
}
Object[] in = inSet.toArray();
HashSet<String> notSet = new HashSet<String>();
while (notSet.size() < 10000) {
String k = randomString(random);
if (!inSet.contains(k)) {
notSet.add(k);
}
}
Object[] not = notSet.toArray();
int best = Integer.MAX_VALUE;
for (int mul = 1; mul < 100000; mul += 2) {
if (!BigInteger.valueOf(mul).isProbablePrime(10)) {
continue;
}
for (int shift = 0; shift < 32; shift++) {
byte[] bloom = BloomFilterUtils.createFilter(100, 64);
for (Object k : in) {
int h1 = hash(k.hashCode(), mul, shift), h2 = hash(h1, mul, shift);
add(bloom, h1, h2);
}
int falsePositives = 0;
for (Object k : not) {
int h1 = hash(k.hashCode(), mul, shift), h2 = hash(h1, mul, shift);
if (probablyContains(bloom, h1, h2)) {
falsePositives++;
// short false positives are bad
if (k.toString().length() < 4) {
falsePositives += 5;
}
if (falsePositives > best) {
break;
}
}
}
if (falsePositives < best) {
best = falsePositives;
System.out.println("mul: " + mul + " shift: "
+ shift + " falsePositives: " + best);
}
}
}
}
private static String randomString(Random r) {
if (r.nextInt(5) == 0) {
return randomName(r);
}
int length = 1 + Math.abs((int) r.nextGaussian() * 5);
if (r.nextBoolean()) {
length += r.nextInt(10);
}
char[] chars = new char[length];
for (int i = 0; i < length; i++) {
chars[i] = randomChar(r);
}
return new String(chars);
}
private static char randomChar(Random r) {
switch (r.nextInt(101) / 100) {
case 0:
case 1:
// 20% ascii
return (char) (32 + r.nextInt(127 - 32));
case 2:
case 3:
case 4:
case 5:
// 40% a-z
return (char) ('a' + r.nextInt('z' - 'a'));
case 6:
// 10% A-Z
return (char) ('A' + r.nextInt('Z' - 'A'));
case 7:
case 8:
// 20% 0-9
return (char) ('0' + r.nextInt('9' - '0'));
case 9:
// 10% aeiou
return "aeiou".charAt(r.nextInt("aeiou".length()));
}
// 1% unicode
return (char) r.nextInt(65535);
}
private static String randomName(Random r) {
int i = r.nextInt(1000);
// like TPC-C lastName, but lowercase
String[] n = {
"bar", "ought", "able", "pri", "pres", "ese", "anti",
"cally", "ation", "eing" };
StringBuilder buff = new StringBuilder();
buff.append(n[i / 100]);
buff.append(n[(i / 10) % 10]);
buff.append(n[i % 10]);
return buff.toString();
}
private static int hash(int oldHash, int mul, int shift) {
return oldHash ^ ((oldHash * mul) >> shift);
}
private static void add(byte[] bloom, int h1, int h2) {
int len = bloom.length;
if (len > 0) {
bloom[(h1 >>> 3) % len] |= 1 << (h1 & 7);
bloom[(h2 >>> 3) % len] |= 1 << (h2 & 7);
}
}
private static boolean probablyContains(byte[] bloom, int h1, int h2) {
int len = bloom.length;
if (len == 0) {
return true;
}
int x = bloom[(h1 >>> 3) % len] & (1 << (h1 & 7));
if (x != 0) {
x = bloom[(h2 >>> 3) % len] & (1 << (h2 & 7));
}
return x != 0;
}
@Test
public void size() {
byte[] bloom = BloomFilterUtils.createFilter(100, 64);
assertEquals(64, bloom.length);
bloom = BloomFilterUtils.createFilter(10, 64);
assertEquals(11, bloom.length);
bloom = BloomFilterUtils.createFilter(0, 64);
assertEquals(0, bloom.length);
bloom = BloomFilterUtils.createFilter(1, 64);
assertEquals(1, bloom.length);
}
@Test
public void probability() {
byte[] bloom = BloomFilterUtils.createFilter(20, 64);
System.out.println(bloom.length);
Random random = new Random(1);
random.setSeed(1);
for (int i = 0; i < 20; i++) {
BloomFilterUtils.add(bloom, random.nextInt());
}
random.setSeed(1);
for (int i = 0; i < 20; i++) {
assertTrue(BloomFilterUtils.probablyContains(bloom, random.nextInt()));
}
int falsePositives = 0;
for (int i = 20; i < 100000; i++) {
if (BloomFilterUtils.probablyContains(bloom, random.nextInt())) {
falsePositives++;
}
}
assertEquals(4594, falsePositives);
}
@Test
public void negativeHashCode() {
BloomFilterUtils.add(new byte[0], new Object() {
@Override
public int hashCode() {
return -1;
}
});
}
}