src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java - datasketches-java - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.datasketches.filters.bloomfilter;

 import static org.apache.datasketches.common.Util.LS;

 import java.nio.charset.StandardCharsets;

 import org.apache.datasketches.common.Family;
 import org.apache.datasketches.common.SketchesArgumentException;
 import org.apache.datasketches.common.SketchesStateException;
 import org.apache.datasketches.memory.Buffer;
 import org.apache.datasketches.memory.Memory;
 import org.apache.datasketches.memory.WritableBuffer;
 import org.apache.datasketches.memory.WritableMemory;
 import org.apache.datasketches.memory.XxHash;

 /**
  * <p>A Bloom filter is a data structure that can be used for probabilistic
  * set membership.</p>
  *
  * <p>When querying a Bloom filter, there are no false positives. Specifically:
  * When querying an item that has already been inserted to the filter, the filter will
  * always indicate that the item is present. There is a chance of false positives, where
  * querying an item that has never been presented to the filter will indicate that the
  * item has already been seen. Consequently, any query should be interpreted as
  * "might have seen."</p>
  *
  * <p>A standard Bloom filter is unlike typical sketches in that it is not sub-linear
  * in size and does not resize itself. A Bloom filter will work up to a target number of
  * distinct items, beyond which it will saturate and the false positive rate will start to
  * increase. The size of a Bloom filter will be linear in the expected number of
  * distinct items.</p>
  *
  * <p>See the BloomFilterBuilder class for methods to create a filter, especially
  * one sized correctly for a target number of distinct elements and a target
  * false positive probability.</p>
  *
  * <p>This implementation uses xxHash64 and follows the approach in Kirsch and Mitzenmacher,
  * "Less Hashing, Same Performance: Building a Better Bloom Filter," Wiley Interscience, 2008, pp. 187-218.</p>
  */
 public final class BloomFilter {
   /**
    * The maximum size of a bloom filter in bits.
    */
   public static final long MAX_SIZE_BITS = (Integer.MAX_VALUE - Family.BLOOMFILTER.getMaxPreLongs()) * (long) Long.SIZE;
   private static final int SER_VER = 1;
   private static final int EMPTY_FLAG_MASK = 4;
   private static final long BIT_ARRAY_OFFSET = 16;
   private static final int FLAGS_BYTE = 3;

   private final long seed_;            // hash seed
   private final short numHashes_;      // number of hash values
   private final BitArray bitArray_;    // the actual data bits
   private final WritableMemory wmem_;  // used only for direct mode BitArray

   /**
    * Creates a BloomFilter with given number of bits and number of hash functions,
    * and a user-specified seed.
    *
    * @param numBits The size of the BloomFilter, in bits
    * @param numHashes The number of hash functions to apply to items
    * @param seed The base hash seed
    */
   BloomFilter(final long numBits, final int numHashes, final long seed) {
     seed_ = seed;
     numHashes_ = (short) numHashes;
     bitArray_ = new HeapBitArray(numBits);
     wmem_ = null;
   }

   /**
    * Creates a BloomFilter with given number of bits and number of hash functions,
    * and a user-specified seed in the provided WritableMemory
    *
    * @param numBits The size of the BloomFilter, in bits
    * @param numHashes The number of hash functions to apply to items
    * @param seed The base hash seed
    * @param wmem A WritableMemory that will be initialized to hold the filter
    */
   BloomFilter(final long numBits, final int numHashes, final long seed, final WritableMemory wmem) {
     if (wmem.getCapacity() < Family.BLOOMFILTER.getMaxPreLongs()) {
       throw new SketchesArgumentException("Provided WritableMemory capacity insufficient to initialize BloomFilter");
     }

     // we don't resize so initialize wizth non-empty preLongs value
     // and no empty flag
     final WritableBuffer wbuf = wmem.asWritableBuffer();
     wbuf.putByte((byte) Family.BLOOMFILTER.getMaxPreLongs());
     wbuf.putByte((byte) SER_VER);
     wbuf.putByte((byte) Family.BLOOMFILTER.getID());
     wbuf.putByte((byte) 0); // instead of (bitArray_.isEmpty() ? EMPTY_FLAG_MASK : 0);
     wbuf.putShort((short) numHashes);
     wbuf.putShort((short) 0); // unused
     wbuf.putLong(seed);

     seed_ = seed;
     numHashes_ = (short) numHashes;
     bitArray_ = DirectBitArray.initialize(numBits, wmem.writableRegion(BIT_ARRAY_OFFSET, wmem.getCapacity() - BIT_ARRAY_OFFSET));
     wmem_ = wmem;
   }

   // Constructor used with internalHeapifyOrWrap()
   BloomFilter(final short numHashes, final long seed, final BitArray bitArray, final WritableMemory wmem) {
     seed_ = seed;
     numHashes_ = numHashes;
     bitArray_ = bitArray;
     wmem_ = wmem;
   }

   /**
    * Reads a serialized image of a BloomFilter from the provided Memory
    * @param mem Memory containing a previously serialized BloomFilter
    * @return a BloomFilter object
    */
   public static BloomFilter heapify(final Memory mem) {
     // casting to writable, but heapify so only reading
     return internalHeapifyOrWrap((WritableMemory) mem, false, false);
   }

   /**
    * Wraps the given Memory into this filter class.  The class itself only contains a few metadata items and holds
    * a reference to the Memory object, which contains all the data.
    * @param mem the given Memory object
    * @return the wrapping BloomFilter class.
    */
   public static BloomFilter wrap(final Memory mem) {
     // casting to writable, but tracking that the object is read-only
     return internalHeapifyOrWrap((WritableMemory) mem, true, false);
   }

   /**
    * Wraps the given WritableMemory into this filter class.  The class itself only contains a few metadata items and holds
    * a reference to the Memory object, which contains all the data.
    * @param wmem the given WritableMemory object
    * @return the wrapping BloomFilter class.
    */
   public static BloomFilter writableWrap(final WritableMemory wmem) {
     return internalHeapifyOrWrap(wmem, true, true);
   }

   private static BloomFilter internalHeapifyOrWrap(final WritableMemory wmem, final boolean isWrap, final boolean isWritable) {
     final Buffer buf = wmem.asBuffer();
     final int preLongs = buf.getByte();
     final int serVer = buf.getByte();
     final int familyID = buf.getByte();
     final int flags = buf.getByte();

     checkArgument(preLongs < Family.BLOOMFILTER.getMinPreLongs() || preLongs > Family.BLOOMFILTER.getMaxPreLongs(),
       "Possible corruption: Incorrect number of preamble bytes specified in header");
     checkArgument(serVer != SER_VER, "Possible corruption: Unrecognized serialization version: " + serVer);
     checkArgument(familyID != Family.BLOOMFILTER.getID(), "Possible corruption: Incorrect FamilyID for bloom filter. Found: " + familyID);

     final short numHashes = buf.getShort();
     buf.getShort(); // unused
     checkArgument(numHashes < 1, "Possible corruption: Need strictly positive number of hash functions. Found: " + numHashes);

     final long seed = buf.getLong();

     final boolean isEmpty = (flags & EMPTY_FLAG_MASK) != 0;

     final BitArray bitArray;
     if (isWrap) {
       if (isWritable) {
         bitArray = BitArray.writableWrap(wmem.writableRegion(BIT_ARRAY_OFFSET, wmem.getCapacity() - BIT_ARRAY_OFFSET), isEmpty);
       } else {
         bitArray = BitArray.wrap(wmem.region(BIT_ARRAY_OFFSET, wmem.getCapacity() - BIT_ARRAY_OFFSET), isEmpty);
       }
       return new BloomFilter(numHashes, seed, bitArray, wmem);
     } else { // if heapify
       bitArray = BitArray.heapify(buf, isEmpty);
       return new BloomFilter(numHashes, seed, bitArray, null);
     }
   }

   /**
    * Resets the BloomFilter to an empty state
    */
   public void reset() {
     bitArray_.reset();
   }

   /**
    * Checks if the BloomFilter has processed any items
    * @return True if the BloomFilter is empty, otherwise False
    */
   public boolean isEmpty() { return bitArray_.isEmpty(); }

   /**
    * Returns the number of bits in the BloomFilter that are set to 1.
    * @return The number of bits in use in this filter
    */
   public long getBitsUsed() { return bitArray_.getNumBitsSet(); }

   /**
    * Returns the total number of bits in the BloomFilter.
    * @return The total size of the BloomFilter
    */
   public long getCapacity() { return bitArray_.getCapacity(); }

   /**
    * Returns the configured number of hash functions for this BloomFilter
    * @return The number of hash functions to apply to inputs
    */
   public short getNumHashes() { return numHashes_; }

   /**
    * Returns the hash seed for this BloomFilter.
    * @return The hash seed for this filter
    */
   public long getSeed() { return seed_; }

   /**
    * Returns whether the filter has a backing Memory object
    * @return true if backed by Memory, otherwise false
    */
   public boolean hasMemory() { return wmem_ != null; }

   /**
    * Returns whether the filter is in read-only mode. That is possible
    * only if there is a backing Memory in read-only mode.
    * @return true if read-only, otherwise false
    */
   public boolean isReadOnly() {
     return wmem_ != null && bitArray_.isReadOnly();
   }

   /**
    * Returns whether the filter is a direct (off-heap) or on-heap object.
    * That is possible only if there is a backing Memory.
    * @return true if using direct memory access, otherwise false
    */
   public boolean isDirect() {
     return wmem_ != null && bitArray_.isDirect();
   }

   /**
    * Returns the percentage of all bits in the BloomFilter set to 1.
    * @return the percentage of bits in the filter set to 1
    */
   public double getFillPercentage() {
     return (double) bitArray_.getNumBitsSet() / bitArray_.getCapacity();
   }

   // UPDATE METHODS
   /**
    * Updates the filter with the provided long value.
    * @param item an item with which to update the filter
    */
   public void update(final long item) {
     final long h0 = XxHash.hashLong(item, seed_);
     final long h1 = XxHash.hashLong(item, h0);
     updateInternal(h0, h1);
   }

   /**
    * Updates the filter with the provided double value. The value is
    * canonicalized (NaN and infinities) prior to updating.
    * @param item an item with which to update the filter
    */
   public void update(final double item) {
     // canonicalize all NaN & +/- infinity forms
     final long[] data = { Double.doubleToLongBits(item) };
     final long h0 = XxHash.hashLongArr(data, 0, 1, seed_);
     final long h1 = XxHash.hashLongArr(data, 0, 1, h0);
     updateInternal(h0, h1);
   }

   /**
    * Updates the filter with the provided String.
    * The string is converted to a byte array using UTF8 encoding.
    *
    * <p>Note: this will not produce the same output hash values as the {@link #update(char[])}
    * method and will generally be a little slower depending on the complexity of the UTF8 encoding.
    * </p>
    *
    * @param item an item with which to update the filter
    */
   public void update(final String item) {
     if (item == null || item.isEmpty()) { return; }
     final byte[] strBytes = item.getBytes(StandardCharsets.UTF_8);
     final long h0 = XxHash.hashByteArr(strBytes, 0, strBytes.length, seed_);
     final long h1 = XxHash.hashByteArr(strBytes, 0, strBytes.length, h0);
     updateInternal(h0, h1);
   }

   /**
    * Updates the filter with the provided byte[].
    * @param data an array with which to update the filter
    */
   public void update(final byte[] data) {
     if (data == null) { return; }
     final long h0 = XxHash.hashByteArr(data, 0, data.length, seed_);
     final long h1 = XxHash.hashByteArr(data, 0, data.length, h0);
     updateInternal(h0, h1);
   }

   /**
    * Updates the filter with the provided char[].
    * @param data an array with which to update the filter
    */
   public void update(final char[] data) {
     if (data == null) { return; }
     final long h0 = XxHash.hashCharArr(data, 0, data.length, seed_);
     final long h1 = XxHash.hashCharArr(data, 0, data.length, h0);
     updateInternal(h0, h1);
   }

   /**
    * Updates the filter with the provided short[].
    * @param data an array with which to update the filter
    */
   public void update(final short[] data) {
     if (data == null) { return; }
     final long h0 = XxHash.hashShortArr(data, 0, data.length, seed_);
     final long h1 = XxHash.hashShortArr(data, 0, data.length, h0);
     updateInternal(h0, h1);
   }

   /**
    * Updates the filter with the provided int[].
    * @param data an array with which to update the filter
    */
   public void update(final int[] data) {
     if (data == null) { return; }
     final long h0 = XxHash.hashIntArr(data, 0, data.length, seed_);
     final long h1 = XxHash.hashIntArr(data, 0, data.length, h0);
     updateInternal(h0, h1);
   }

   /**
    * Updates the filter with the provided long[].
    * @param data an array with which to update the filter
    */
   public void update(final long[] data) {
     if (data == null) { return; }
     final long h0 = XxHash.hashLongArr(data, 0, data.length, seed_);
     final long h1 = XxHash.hashLongArr(data, 0, data.length, h0);
     updateInternal(h0, h1);
   }

   /**
    * Updates the filter with the data in the provided Memory.
    * @param mem a Memory object with which to update the filter
    */
   public void update(final Memory mem) {
     if (mem == null) { return; }
     final long h0 = mem.xxHash64(0, mem.getCapacity(), seed_);
     final long h1 = mem.xxHash64(0, mem.getCapacity(), h0);
     updateInternal(h0, h1);
   }

   // Internal method to apply updates given pre-computed hashes
   private void updateInternal(final long h0, final long h1) {
     final long numBits = bitArray_.getCapacity();
     for (int i = 1; i <= numHashes_; ++i) {
       // right-shift to ensure non-negative value
       final long hashIndex = ((h0 + i * h1) >>> 1) % numBits;
       bitArray_.setBit(hashIndex);
     }
   }

   // QUERY-AND-UPDATE METHODS

   /**
    * Updates the filter with the provided long and
    * returns the result from quering that value prior to the update.
    * @param item an item with which to update the filter
    * @return The query result prior to applying the update
    */
   public boolean queryAndUpdate(final long item) {
     final long h0 = XxHash.hashLong(item, seed_);
     final long h1 = XxHash.hashLong(item, h0);
     return queryAndUpdateInternal(h0, h1);
   }

   /**
    * Updates the filter with the provided double and
    * returns the result from quering that value prior to the update.
    * The double is canonicalized (NaN and +/- infinity) in the call.
    * @param item an item with which to update the filter
    * @return The query result prior to applying the update
    */
   public boolean queryAndUpdate(final double item) {
     // canonicalize all NaN & +/- infinity forms
     final long[] data = { Double.doubleToLongBits(item) };
     final long h0 = XxHash.hashLongArr(data, 0, 1, seed_);
     final long h1 = XxHash.hashLongArr(data, 0, 1, h0);
     return queryAndUpdateInternal(h0, h1);
   }

   /**
    * Updates the filter with the provided String and
    * returns the result from quering that value prior to the update.
    * The string is converted to a byte array using UTF8 encoding.
    *
    * <p>Note: this will not produce the same output hash values as the {@link #queryAndUpdate(char[])}
    * method and will generally be a little slower depending on the complexity of the UTF8 encoding.
    * </p>
    *
    * @param item an item with which to update the filter
    * @return The query result prior to applying the update, or false if item is null
    */
   public boolean queryAndUpdate(final String item) {
     if (item == null || item.isEmpty()) { return false; }
     final byte[] strBytes = item.getBytes(StandardCharsets.UTF_8);
     final long h0 = XxHash.hashByteArr(strBytes, 0, strBytes.length, seed_);
     final long h1 = XxHash.hashByteArr(strBytes, 0, strBytes.length, h0);
     return queryAndUpdateInternal(h0, h1);
   }

   /**
    * Updates the filter with the provided byte[] and
    * returns the result from quering that array prior to the update.
    * @param data an array with which to update the filter
    * @return The query result prior to applying the update, or false if data is null
    */
   public boolean queryAndUpdate(final byte[] data) {
     final long h0 = XxHash.hashByteArr(data, 0, data.length, seed_);
     final long h1 = XxHash.hashByteArr(data, 0, data.length, h0);
     return queryAndUpdateInternal(h0, h1);
   }

   /**
    * Updates the filter with the provided char[] and
    * returns the result from quering that array prior to the update.
    * @param data an array with which to update the filter
    * @return The query result prior to applying the update, or false if data is null
    */
   public boolean queryAndUpdate(final char[] data) {
     if (data == null) { return false; }
     final long h0 = XxHash.hashCharArr(data, 0, data.length, seed_);
     final long h1 = XxHash.hashCharArr(data, 0, data.length, h0);
     return queryAndUpdateInternal(h0, h1);
   }

   /**
    * Updates the filter with the provided short[] and
    * returns the result from quering that array prior to the update.
    * @param data an array with which to update the filter
    * @return The query result prior to applying the update, or false if data is null
    */
   public boolean queryAndUpdate(final short[] data) {
     if (data == null) { return false; }
     final long h0 = XxHash.hashShortArr(data, 0, data.length, seed_);
     final long h1 = XxHash.hashShortArr(data, 0, data.length, h0);
     return queryAndUpdateInternal(h0, h1);
   }

   /**
    * Updates the filter with the provided int[] and
    * returns the result from quering that array prior to the update.
    * @param data an array with which to update the filter
    * @return The query result prior to applying the update, or false if data is null
    */
   public boolean queryAndUpdate(final int[] data) {
     if (data == null) { return false; }
     final long h0 = XxHash.hashIntArr(data, 0, data.length, seed_);
     final long h1 = XxHash.hashIntArr(data, 0, data.length, h0);
     return queryAndUpdateInternal(h0, h1);
   }

   /**
    * Updates the filter with the provided long[] and
    * returns the result from quering that array prior to the update.
    * @param data an array with which to update the filter
    * @return The query result prior to applying the update, or false if data is null
    */
   public boolean queryAndUpdate(final long[] data) {
     if (data == null) { return false; }
     final long h0 = XxHash.hashLongArr(data, 0, data.length, seed_);
     final long h1 = XxHash.hashLongArr(data, 0, data.length, h0);
     return queryAndUpdateInternal(h0, h1);
   }

   /**
    * Updates the filter with the provided Memory and
    * returns the result from quering that Memory prior to the update.
    * @param mem an array with which to update the filter
    * @return The query result prior to applying the update, or false if mem is null
    */
   public boolean queryAndUpdate(final Memory mem) {
     if (mem == null) { return false; }
     final long h0 = mem.xxHash64(0, mem.getCapacity(), seed_);
     final long h1 = mem.xxHash64(0, mem.getCapacity(), h0);
     return queryAndUpdateInternal(h0, h1);
   }

   // Internal query-and-update method given pre-computed hashes
   private boolean queryAndUpdateInternal(final long h0, final long h1) {
     final long numBits = bitArray_.getCapacity();
     boolean valueAlreadyExists = true;
     for (int i = 1; i <= numHashes_; ++i) {
       final long hashIndex = ((h0 + i * h1) >>> 1) % numBits;
       // returns old value of bit
       valueAlreadyExists &= bitArray_.getAndSetBit(hashIndex);
     }
     return valueAlreadyExists;
   }

   // QUERY METHODS
   /**
    * Queries the filter with the provided long and returns whether the
    * value <em>might</em> have been seen previously. The filter's expected
    * False Positive Probability determines the chances of a true result being
    * a false positive. False negatives are never possible.
    * @param item an item with which to query the filter
    * @return The result of querying the filter with the given item
    */
   public boolean query(final long item) {
     final long h0 = XxHash.hashLong(item, seed_);
     final long h1 = XxHash.hashLong(item, h0);
     return queryInternal(h0, h1);
   }

   /**
    * Queries the filter with the provided double and returns whether the
    * value <em>might</em> have been seen previously. The filter's expected
    * False Positive Probability determines the chances of a true result being
    * a false positive. False negatives are never possible. Double values are
    * canonicalized (NaN and +/- infinity) before querying.
    * @param item an item with which to query the filter
    * @return The result of querying the filter with the given item
    */
   public boolean query(final double item) {
     // canonicalize all NaN & +/- infinity forms
     final long[] data = { Double.doubleToLongBits(item) };
     final long h0 = XxHash.hashLongArr(data, 0, 1, seed_);
     final long h1 = XxHash.hashLongArr(data, 0, 1, h0);
     return queryInternal(h0, h1);
   }

   /**
    * Queries the filter with the provided double and returns whether the
    * value <em>might</em> have been seen previously. The filter's expected
    * False Positive Probability determines the chances of a true result being
    * a false positive. False negatives are never possible.
    * The string is converted to a byte array using UTF8 encoding.
    *
    * <p>Note: this will not produce the same output hash values as the {@link #update(char[])}
    * method and will generally be a little slower depending on the complexity of the UTF8 encoding.
    * </p>
    *
    * @param item an item with which to query the filter
    * @return The result of querying the filter with the given item, or false if item is null
    */
   public boolean query(final String item) {
     if (item == null || item.isEmpty()) { return false; }
     final byte[] strBytes = item.getBytes(StandardCharsets.UTF_8);
     final long h0 = XxHash.hashByteArr(strBytes, 0, strBytes.length, seed_);
     final long h1 = XxHash.hashByteArr(strBytes, 0, strBytes.length, h0);
     return queryInternal(h0, h1);
   }

   /**
    * Queries the filter with the provided byte[] and returns whether the
    * array <em>might</em> have been seen previously. The filter's expected
    * False Positive Probability determines the chances of a true result being
    * a false positive. False negatives are never possible.
    * @param data an array with which to query the filter
    * @return The result of querying the filter with the given data, or false if data is null
    */
   public boolean query(final byte[] data) {
     if (data == null) { return false; }
     final long h0 = XxHash.hashByteArr(data, 0, data.length, seed_);
     final long h1 = XxHash.hashByteArr(data, 0, data.length, h0);
     return queryInternal(h0, h1);
   }

   /**
    * Queries the filter with the provided char[] and returns whether the
    * array <em>might</em> have been seen previously. The filter's expected
    * False Positive Probability determines the chances of a true result being
    * a false positive. False negatives are never possible.
    * @param data an array with which to query the filter
    * @return The result of querying the filter with the given data, or false if data is null
    */
   public boolean query(final char[] data) {
     if (data == null) { return false; }
     final long h0 = XxHash.hashCharArr(data, 0, data.length, seed_);
     final long h1 = XxHash.hashCharArr(data, 0, data.length, h0);
     return queryInternal(h0, h1);
   }

   /**
    * Queries the filter with the provided short[] and returns whether the
    * array <em>might</em> have been seen previously. The filter's expected
    * False Positive Probability determines the chances of a true result being
    * a false positive. False negatives are never possible.
    * @param data an array with which to query the filter
    * @return The result of querying the filter with the given data, or false if data is null
    */
   public boolean query(final short[] data) {
     if (data == null) { return false; }
     final long h0 = XxHash.hashShortArr(data, 0, data.length, seed_);
     final long h1 = XxHash.hashShortArr(data, 0, data.length, h0);
     return queryInternal(h0, h1);
   }

   /**
    * Queries the filter with the provided int[] and returns whether the
    * array <em>might</em> have been seen previously. The filter's expected
    * False Positive Probability determines the chances of a true result being
    * a false positive. False negatives are never possible.
    * @param data an array with which to query the filter
    * @return The result of querying the filter with the given data, or false if data is null
    */
   public boolean query(final int[] data) {
     if (data == null) { return false; }
     final long h0 = XxHash.hashIntArr(data, 0, data.length, seed_);
     final long h1 = XxHash.hashIntArr(data, 0, data.length, h0);
     return queryInternal(h0, h1);
   }

   /**
    * Queries the filter with the provided long[] and returns whether the
    * array <em>might</em> have been seen previously. The filter's expected
    * False Positive Probability determines the chances of a true result being
    * a false positive. False negatives are never possible.
    * @param data an array with which to query the filter
    * @return The result of querying the filter with the given data, or false if data is null
    */
   public boolean query(final long[] data) {
     if (data == null) { return false; }
     final long h0 = XxHash.hashLongArr(data, 0, data.length, seed_);
     final long h1 = XxHash.hashLongArr(data, 0, data.length, h0);
     return queryInternal(h0, h1);
   }

   /**
    * Queries the filter with the provided Memory and returns whether the
    * data <em>might</em> have been seen previously. The filter's expected
    * False Positive Probability determines the chances of a true result being
    * a false positive. False negatives are never possible.
    * @param mem a Memory array with which to query the filter
    * @return The result of querying the filter with the given Memory, or false if data is null
    */
   public boolean query(final Memory mem) {
     if (mem == null) { return false; }
     final long h0 = mem.xxHash64(0, mem.getCapacity(), seed_);
     final long h1 = mem.xxHash64(0, mem.getCapacity(), h0);
     return queryInternal(h0, h1);
   }

   // Internal method to query the filter given pre-computed hashes
   private boolean queryInternal(final long h0, final long h1) {
     final long numBits = bitArray_.getCapacity();
     for (int i = 1; i <= numHashes_; ++i) {
       final long hashIndex = ((h0 + i * h1) >>> 1) % numBits;
       // returns old value of bit
       if (!bitArray_.getBit(hashIndex)) {
         return false;
       }
     }
     return true;
   }

   // OTHER OPERATIONS
   /**
    * Unions two BloomFilters by applying a logical OR. The result will recognized
    * any values seen by either filter (as well as false positives).
    * @param other A BloomFilter to union with this one
    */
   public void union(final BloomFilter other) {
     if (other == null) { return; }
     if (!isCompatible(other)) {
       throw new SketchesArgumentException("Cannot union sketches with different seeds, hash functions, or sizes");
     }

     bitArray_.union(other.bitArray_);
   }

   /**
    * Intersects two BloomFilters by applying a logical AND. The result will recognize
    * only values seen by both filters (as well as false positives).
    * @param other A BloomFilter to union with this one
    */
   public void intersect(final BloomFilter other) {
     if (other == null) { return; }
     if (!isCompatible(other)) {
       throw new SketchesArgumentException("Cannot union sketches with different seeds, hash functions, or sizes");
     }

     bitArray_.intersect(other.bitArray_);
   }

   /**
    * Inverts all the bits of the BloomFilter. Approximately inverts the notion of set-membership.
    */
   public void invert() {
     bitArray_.invert();
   }

   /**
    * Helps identify if two BloomFilters may be unioned or intersected.
    * @param other A BloomFilter to check for compatibility with this one
    * @return True if the filters are compatible, otherwise false
    */
   public boolean isCompatible(final BloomFilter other) {
     if (other == null
         || seed_ != other.seed_
         || numHashes_ != other.numHashes_
         || bitArray_.getArrayLength() != other.bitArray_.getArrayLength()) {
           return false;
     }
     return true;
   }

   /**
    * Returns the length of this BloomFilter when serialized, in bytes
    * @return The length of this BloomFilter when serialized, in bytes
    */
   public long getSerializedSizeBytes() {
     long sizeBytes = 2L * Long.BYTES; // basic sketch info + baseSeed
     sizeBytes += bitArray_.getSerializedSizeBytes();
     return sizeBytes;
   }

   /**
    * Returns the serialized length of a non-empty BloomFilter of the given size, in bytes
    * @param numBits The number of bits of to use for size computation
    * @return The serialized length of a non-empty BloomFilter of the given size, in bytes
    */
   public static long getSerializedSize(final long numBits) {
     return (2L * Long.BYTES) + BitArray.getSerializedSizeBytes(numBits);
   }

 /*
  * A Bloom Filter's serialized image always uses 3 longs of preamble when empty,
  * otherwise 4 longs:
  *
  * <pre>
  * Long || Start Byte Adr:
  * Adr:
  *      ||       0        |    1   |    2   |    3   |    4   |    5   |    6   |    7   |
  *  0   || Preamble_Longs | SerVer | FamID  |  Flags |----Num Hashes---|-----Unused------|
  *
  *      ||       8        |    9   |   10   |   11   |   12   |   13   |   14   |   15   |
  *  1   ||---------------------------------Hash Seed-------------------------------------|
  *
  *      ||      16        |   17   |   18   |   19   |   20   |   21   |   22   |   23   |
  *  2   ||-------BitArray Length (in longs)----------|-----------Unused------------------|
  *
  *      ||      24        |   25   |   26   |   27   |   28   |   29   |   30   |   31   |
  *  3   ||---------------------------------NumBitsSet------------------------------------|
  *  </pre>
  *
  * The raw BitArray bits, if non-empty start at byte 24.
  */

   /**
    * Serializes the current BloomFilter to an array of bytes.
    *
    * <p>Note: Method throws if the serialized size exceeds <code>Integer.MAX_VALUE</code>.</p>
    * @return A serialized image of the current BloomFilter as byte[]
    */
   public byte[] toByteArray() {
     final long sizeBytes = getSerializedSizeBytes();
     if (sizeBytes > Integer.MAX_VALUE) {
       throw new SketchesStateException("Cannot serialize a BloomFilter of this size using toByteArray(); use toLongArray() instead.");
     }

     final byte[] bytes = new byte[(int) sizeBytes];

     if (wmem_ == null) {
       final WritableBuffer wbuf = WritableMemory.writableWrap(bytes).asWritableBuffer();

       final int numPreLongs = isEmpty() ? Family.BLOOMFILTER.getMinPreLongs() : Family.BLOOMFILTER.getMaxPreLongs();
       wbuf.putByte((byte) numPreLongs);
       wbuf.putByte((byte) SER_VER);
       wbuf.putByte((byte) Family.BLOOMFILTER.getID());
       wbuf.putByte((byte) (bitArray_.isEmpty() ? EMPTY_FLAG_MASK : 0));
       wbuf.putShort(numHashes_);
       wbuf.putShort((short) 0); // unused
       wbuf.putLong(seed_);

       ((HeapBitArray) bitArray_).writeToBuffer(wbuf);
     } else {
       wmem_.getByteArray(0, bytes, 0, (int) sizeBytes);
       if (isEmpty()) {
         bytes[FLAGS_BYTE] |= EMPTY_FLAG_MASK;
       }
     }
     return bytes;
   }

   /**
    * Serializes the current BloomFilter to an array of longs. Unlike {@link #toByteArray()},
    * this method can handle any size filter.
    *
    * @return A serialized image of the current BloomFilter as long[]
    */
   public long[] toLongArray() {
     final long sizeBytes = getSerializedSizeBytes();

     final long[] longs = new long[(int) (sizeBytes >> 3)];
     if (wmem_ == null) {
       final WritableBuffer wbuf = WritableMemory.writableWrap(longs).asWritableBuffer();

       final int numPreLongs = isEmpty() ? Family.BLOOMFILTER.getMinPreLongs() : Family.BLOOMFILTER.getMaxPreLongs();
       wbuf.putByte((byte) numPreLongs);
       wbuf.putByte((byte) SER_VER); // to do: add constant
       wbuf.putByte((byte) Family.BLOOMFILTER.getID());
       wbuf.putByte((byte) (bitArray_.isEmpty() ? EMPTY_FLAG_MASK : 0));
       wbuf.putShort(numHashes_);
       wbuf.putShort((short) 0); // unused
       wbuf.putLong(seed_);

       ((HeapBitArray) bitArray_).writeToBuffer(wbuf);
     } else {
       wmem_.getLongArray(0, longs, 0, (int) (sizeBytes >>> 3));
       if (isEmpty()) {
         longs[0] |= (EMPTY_FLAG_MASK << (FLAGS_BYTE << 3));
       }
     }
     return longs;
   }

   // Throws an exception with the provided message if the given condition is false
   private static void checkArgument(final boolean condition, final String message) {
     if (condition) { throw new SketchesArgumentException(message); }
   }

   @Override
   public String toString() {
     final StringBuilder sb = new StringBuilder();

     sb.append(LS);
     final String thisSimpleName = this.getClass().getSimpleName();
     sb.append("### ").append(thisSimpleName).append(" SUMMARY: ").append(LS);
     sb.append("   numBits      : ").append(bitArray_.getCapacity()).append(LS);
     sb.append("   numHashes    : ").append(numHashes_).append(LS);
     sb.append("   seed         : ").append(seed_).append(LS);
     sb.append("   bitsUsed     : ").append(bitArray_.getNumBitsSet()).append(LS);
     sb.append("   fill %       : ").append(getFillPercentage()).append(LS);
     sb.append("### END SKETCH SUMMARY").append(LS);

     return sb.toString();
   }
 }