PARQUET-1743: Add equals method to the BloomFilter (#731)
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java index cc9f674..043c40e 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
@@ -28,6 +28,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.IntBuffer; +import java.util.Arrays; /* * This Bloom filter is implemented using block-based Bloom filter algorithm from Putze et al.'s @@ -356,4 +357,18 @@ public long hash(Binary value) { return hashFunction.hashBytes(value.getBytes()); } + + @Override + public boolean equals(Object object) { + if (object == this) { + return true; + } + if (object instanceof BlockSplitBloomFilter) { + BlockSplitBloomFilter that = (BlockSplitBloomFilter) object; + return Arrays.equals(this.bitset, that.bitset) + && this.maximumBytes == that.maximumBytes + && this.hashStrategy == that.hashStrategy; + } + return false; + } }
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java index 8b26c97..f1f5897 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
@@ -94,6 +94,14 @@ long getBitsetSize(); /** + * Compare this Bloom filter to the specified object. + * + * @param object + * @return true if the given object represents a Bloom filter equivalent to this Bloom filter, false otherwise. + */ + boolean equals(Object object); + + /** * Compute hash for int value by using its plain encoding result. * * @param value the value to hash
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java b/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java index d75c0e2..1017302 100644 --- a/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java +++ b/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
@@ -34,6 +34,7 @@ import org.junit.rules.TemporaryFolder; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertNotEquals; public class TestBlockSplitBloomFilter { @Test @@ -94,6 +95,25 @@ } @Test + public void testEquals() { + final String[] words = {"hello", "parquet", "bloom", "filter"}; + BloomFilter bloomFilterOne = new BlockSplitBloomFilter(1024); + BloomFilter bloomFilterTwo = new BlockSplitBloomFilter(1024); + + for (String word : words) { + bloomFilterOne.insertHash(bloomFilterOne.hash(Binary.fromString(word))); + bloomFilterTwo.insertHash(bloomFilterTwo.hash(Binary.fromString(word))); + } + + assertEquals(bloomFilterOne, bloomFilterTwo); + + BloomFilter bloomFilterThree = new BlockSplitBloomFilter(1024); + bloomFilterThree.insertHash(bloomFilterThree.hash(Binary.fromString("parquet"))); + + assertNotEquals(bloomFilterTwo, bloomFilterThree); + } + + @Test public void testFPP() throws IOException { final int totalCount = 100000; final double FPP = 0.01;