PARQUET-1743: Add equals method to the BloomFilter (#731)
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
index cc9f674..043c40e 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
@@ -28,6 +28,7 @@
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.IntBuffer;
+import java.util.Arrays;
/*
* This Bloom filter is implemented using block-based Bloom filter algorithm from Putze et al.'s
@@ -356,4 +357,18 @@
public long hash(Binary value) {
return hashFunction.hashBytes(value.getBytes());
}
+
+ @Override
+ public boolean equals(Object object) {
+ if (object == this) {
+ return true;
+ }
+ if (object instanceof BlockSplitBloomFilter) {
+ BlockSplitBloomFilter that = (BlockSplitBloomFilter) object;
+ return Arrays.equals(this.bitset, that.bitset)
+ && this.maximumBytes == that.maximumBytes
+ && this.hashStrategy == that.hashStrategy;
+ }
+ return false;
+ }
}
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
index 8b26c97..f1f5897 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
@@ -94,6 +94,14 @@
long getBitsetSize();
/**
+ * Compare this Bloom filter to the specified object.
+ *
+ * @param object
+ * @return true if the given object represents a Bloom filter equivalent to this Bloom filter, false otherwise.
+ */
+ boolean equals(Object object);
+
+ /**
* Compute hash for int value by using its plain encoding result.
*
* @param value the value to hash
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java b/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
index d75c0e2..1017302 100644
--- a/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
+++ b/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
@@ -34,6 +34,7 @@
import org.junit.rules.TemporaryFolder;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertNotEquals;
public class TestBlockSplitBloomFilter {
@Test
@@ -94,6 +95,25 @@
}
@Test
+ public void testEquals() {
+ final String[] words = {"hello", "parquet", "bloom", "filter"};
+ BloomFilter bloomFilterOne = new BlockSplitBloomFilter(1024);
+ BloomFilter bloomFilterTwo = new BlockSplitBloomFilter(1024);
+
+ for (String word : words) {
+ bloomFilterOne.insertHash(bloomFilterOne.hash(Binary.fromString(word)));
+ bloomFilterTwo.insertHash(bloomFilterTwo.hash(Binary.fromString(word)));
+ }
+
+ assertEquals(bloomFilterOne, bloomFilterTwo);
+
+ BloomFilter bloomFilterThree = new BlockSplitBloomFilter(1024);
+ bloomFilterThree.insertHash(bloomFilterThree.hash(Binary.fromString("parquet")));
+
+ assertNotEquals(bloomFilterTwo, bloomFilterThree);
+ }
+
+ @Test
public void testFPP() throws IOException {
final int totalCount = 100000;
final double FPP = 0.01;