Merge pull request #351 from apache/MinorUpdates

Minor updates
diff --git a/src/main/java/org/apache/datasketches/BoundsOnRatiosInTupleSketchedSets.java b/src/main/java/org/apache/datasketches/BoundsOnRatiosInTupleSketchedSets.java
new file mode 100644
index 0000000..d74170d
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/BoundsOnRatiosInTupleSketchedSets.java
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches;
+
+import static org.apache.datasketches.Util.LONG_MAX_VALUE_AS_DOUBLE;
+
+import org.apache.datasketches.tuple.Sketch;
+import org.apache.datasketches.tuple.Summary;
+
+/**
+ * This class is used to compute the bounds on the estimate of the ratio <i>B / A</i>, where:
+ * <ul>
+ * <li><i>A</i> is a Tuple Sketch of population <i>PopA</i>.</li>
+ * <li><i>B</i> is a Tuple or Theta Sketch of population <i>PopB</i> that is a subset of <i>A</i>,
+ * obtained by an intersection of <i>A</i> with some other Tuple or Theta Sketch <i>C</i>,
+ * which acts like a predicate or selection clause.</li>
+ * <li>The estimate of the ratio <i>PopB/PopA</i> is
+ * BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(<i>A, B</i>).</li>
+ * <li>The Upper Bound estimate on the ratio PopB/PopA is
+ * BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA(<i>A, B</i>).</li>
+ * <li>The Lower Bound estimate on the ratio PopB/PopA is
+ * BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA(<i>A, B</i>).</li>
+ * </ul>
+ * Note: The theta of <i>A</i> cannot be greater than the theta of <i>B</i>.
+ * If <i>B</i> is formed as an intersection of <i>A</i> and some other set <i>C</i>,
+ * then the theta of <i>B</i> is guaranteed to be less than or equal to the theta of <i>B</i>.
+ *
+ * @author Kevin Lang
+ * @author Lee Rhodes
+ * @author David Cromberge
+ */
+public final class BoundsOnRatiosInTupleSketchedSets {
+
+  private BoundsOnRatiosInTupleSketchedSets() {}
+
+  /**
+   * Gets the approximate lower bound for B over A based on a 95% confidence interval
+   * @param sketchA the Tuple sketch A with summary type <i>S</i>
+   * @param sketchB the Tuple sketch B with summary type <i>S</i>
+   * @param <S> Summary
+   * @return the approximate lower bound for B over A
+   */
+  public static <S extends Summary> double getLowerBoundForBoverA(
+      final Sketch<S> sketchA,
+      final Sketch<S> sketchB) {
+    final long thetaLongA = sketchA.getThetaLong();
+    final long thetaLongB = sketchB.getThetaLong();
+    checkThetas(thetaLongA, thetaLongB);
+
+    final int countB = sketchB.getRetainedEntries();
+    final int countA = thetaLongB == thetaLongA
+        ? sketchA.getRetainedEntries()
+        : sketchA.getCountLessThanThetaLong(thetaLongB);
+
+    if (countA <= 0) { return 0; }
+    final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE;
+    return BoundsOnRatiosInSampledSets.getLowerBoundForBoverA(countA, countB, f);
+  }
+
+  /**
+   * Gets the approximate lower bound for B over A based on a 95% confidence interval
+   * @param sketchA the Tuple sketch A with summary type <i>S</i>
+   * @param sketchB the Theta sketch B
+   * @param <S> Summary
+   * @return the approximate lower bound for B over A
+   */
+  public static <S extends Summary> double getLowerBoundForBoverA(
+      final Sketch<S> sketchA,
+      final org.apache.datasketches.theta.Sketch sketchB) {
+    final long thetaLongA = sketchA.getThetaLong();
+    final long thetaLongB = sketchB.getThetaLong();
+    checkThetas(thetaLongA, thetaLongB);
+
+    final int countB = sketchB.getRetainedEntries();
+    final int countA = thetaLongB == thetaLongA
+            ? sketchA.getRetainedEntries()
+            : sketchA.getCountLessThanThetaLong(thetaLongB);
+
+    if (countA <= 0) { return 0; }
+    final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE;
+    return BoundsOnRatiosInSampledSets.getLowerBoundForBoverA(countA, countB, f);
+  }
+
+  /**
+   * Gets the approximate upper bound for B over A based on a 95% confidence interval
+   * @param sketchA the Tuple sketch A with summary type <i>S</i>
+   * @param sketchB the Tuple sketch B with summary type <i>S</i>
+   * @param <S> Summary
+   * @return the approximate upper bound for B over A
+   */
+  public static <S extends Summary> double getUpperBoundForBoverA(
+      final Sketch<S> sketchA,
+      final Sketch<S> sketchB) {
+    final long thetaLongA = sketchA.getThetaLong();
+    final long thetaLongB = sketchB.getThetaLong();
+    checkThetas(thetaLongA, thetaLongB);
+
+    final int countB = sketchB.getRetainedEntries();
+    final int countA = thetaLongB == thetaLongA
+            ? sketchA.getRetainedEntries()
+            : sketchA.getCountLessThanThetaLong(thetaLongB);
+
+    if (countA <= 0) { return 1.0; }
+    final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE;
+    return BoundsOnRatiosInSampledSets.getUpperBoundForBoverA(countA, countB, f);
+  }
+
+  /**
+   * Gets the approximate upper bound for B over A based on a 95% confidence interval
+   * @param sketchA the Tuple sketch A with summary type <i>S</i>
+   * @param sketchB the Theta sketch B
+   * @param <S> Summary
+   * @return the approximate upper bound for B over A
+   */
+  public static <S extends Summary> double getUpperBoundForBoverA(
+      final Sketch<S> sketchA,
+      final org.apache.datasketches.theta.Sketch sketchB) {
+    final long thetaLongA = sketchA.getThetaLong();
+    final long thetaLongB = sketchB.getThetaLong();
+    checkThetas(thetaLongA, thetaLongB);
+
+    final int countB = sketchB.getRetainedEntries(true);
+    final int countA = thetaLongB == thetaLongA
+        ? sketchA.getRetainedEntries()
+        : sketchA.getCountLessThanThetaLong(thetaLongB);
+
+    if (countA <= 0) { return 1.0; }
+    final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE;
+    return BoundsOnRatiosInSampledSets.getUpperBoundForBoverA(countA, countB, f);
+  }
+
+  /**
+   * Gets the estimate for B over A
+   * @param sketchA the Tuple sketch A with summary type <i>S</i>
+   * @param sketchB the Tuple sketch B with summary type <i>S</i>
+   * @param <S> Summary
+   * @return the estimate for B over A
+   */
+  public static <S extends Summary> double getEstimateOfBoverA(
+      final Sketch<S> sketchA,
+      final Sketch<S> sketchB) {
+    final long thetaLongA = sketchA.getThetaLong();
+    final long thetaLongB = sketchB.getThetaLong();
+    checkThetas(thetaLongA, thetaLongB);
+
+    final int countB = sketchB.getRetainedEntries();
+    final int countA = thetaLongB == thetaLongA
+        ? sketchA.getRetainedEntries()
+        : sketchA.getCountLessThanThetaLong(thetaLongB);
+
+    if (countA <= 0) { return 0.5; }
+
+    return (double) countB / (double) countA;
+  }
+
+  /**
+   * Gets the estimate for B over A
+   * @param sketchA the Tuple sketch A with summary type <i>S</i>
+   * @param sketchB the Theta sketch B
+   * @param <S> Summary
+   * @return the estimate for B over A
+   */
+  public static <S extends Summary> double getEstimateOfBoverA(
+      final Sketch<S> sketchA,
+      final org.apache.datasketches.theta.Sketch sketchB) {
+    final long thetaLongA = sketchA.getThetaLong();
+    final long thetaLongB = sketchB.getThetaLong();
+    checkThetas(thetaLongA, thetaLongB);
+
+    final int countB = sketchB.getRetainedEntries(true);
+    final int countA = thetaLongB == thetaLongA
+            ? sketchA.getRetainedEntries()
+            : sketchA.getCountLessThanThetaLong(thetaLongB);
+
+    if (countA <= 0) { return 0.5; }
+
+    return (double) countB / (double) countA;
+  }
+
+  static void checkThetas(final long thetaLongA, final long thetaLongB) {
+    if (thetaLongB > thetaLongA) {
+      throw new SketchesArgumentException("ThetaLongB cannot be > ThetaLongA.");
+    }
+  }
+}
diff --git a/src/main/java/org/apache/datasketches/tuple/CompactSketch.java b/src/main/java/org/apache/datasketches/tuple/CompactSketch.java
index 07d350f..9a76587 100644
--- a/src/main/java/org/apache/datasketches/tuple/CompactSketch.java
+++ b/src/main/java/org/apache/datasketches/tuple/CompactSketch.java
@@ -19,6 +19,8 @@
 
 package org.apache.datasketches.tuple;
 
+import static org.apache.datasketches.HashOperations.count;
+
 import java.lang.reflect.Array;
 import java.nio.ByteOrder;
 
@@ -79,19 +81,19 @@
     SerializerDeserializer
       .validateType(mem.getByte(offset++), SerializerDeserializer.SketchType.CompactSketch);
     final byte flags = mem.getByte(offset++);
-    final boolean isBigEndian = (flags & (1 << Flags.IS_BIG_ENDIAN.ordinal())) > 0;
+    final boolean isBigEndian = (flags & 1 << Flags.IS_BIG_ENDIAN.ordinal()) > 0;
     if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) {
       throw new SketchesArgumentException("Byte order mismatch");
     }
-    empty_ = (flags & (1 << Flags.IS_EMPTY.ordinal())) > 0;
-    final boolean isThetaIncluded = (flags & (1 << Flags.IS_THETA_INCLUDED.ordinal())) > 0;
+    empty_ = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0;
+    final boolean isThetaIncluded = (flags & 1 << Flags.IS_THETA_INCLUDED.ordinal()) > 0;
     if (isThetaIncluded) {
       thetaLong_ = mem.getLong(offset);
       offset += Long.BYTES;
     } else {
       thetaLong_ = Long.MAX_VALUE;
     }
-    final boolean hasEntries = (flags & (1 << Flags.HAS_ENTRIES.ordinal())) > 0;
+    final boolean hasEntries = (flags & 1 << Flags.HAS_ENTRIES.ordinal()) > 0;
     if (hasEntries) {
       int classNameLength = 0;
       if (version == serialVersionWithSummaryClassNameUID) {
@@ -139,6 +141,11 @@
     return hashArr_ == null ? 0 : hashArr_.length;
   }
 
+  @Override
+  public int getCountLessThanThetaLong(final long thetaLong) {
+    return count(hashArr_, thetaLong);
+  }
+
   // Layout of first 8 bytes:
   // Long || Start Byte Adr:
   // Adr:
@@ -171,7 +178,7 @@
     if (count > 0) {
       sizeBytes +=
         + Integer.BYTES // count
-        + (Long.BYTES * count) + summariesBytesLength;
+        + Long.BYTES * count + summariesBytesLength;
     }
     final byte[] bytes = new byte[sizeBytes];
     int offset = 0;
diff --git a/src/main/java/org/apache/datasketches/tuple/JaccardSimilarity.java b/src/main/java/org/apache/datasketches/tuple/JaccardSimilarity.java
new file mode 100644
index 0000000..1567071
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple/JaccardSimilarity.java
@@ -0,0 +1,371 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple;
+
+import static java.lang.Math.max;
+import static java.lang.Math.min;
+import static org.apache.datasketches.BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA;
+import static org.apache.datasketches.BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA;
+import static org.apache.datasketches.BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA;
+import static org.apache.datasketches.Util.MAX_LG_NOM_LONGS;
+import static org.apache.datasketches.Util.MIN_LG_NOM_LONGS;
+import static org.apache.datasketches.Util.ceilingPowerOf2;
+
+import org.apache.datasketches.SketchesArgumentException;
+
+/**
+ * Jaccard similarity of two Tuple Sketches, or alternatively, of a Tuple and Theta Sketch.
+ *
+ * <p>Note: only retained hash values are compared, and the Tuple summary values are not accounted for in the
+ * similarity measure.</p>
+ *
+ * @author Lee Rhodes
+ * @author David Cromberge
+ */
+public final class JaccardSimilarity {
+  private static final double[] ZEROS = {0.0, 0.0, 0.0}; // LB, Estimate, UB
+  private static final double[] ONES = {1.0, 1.0, 1.0};
+
+  /**
+   * Computes the Jaccard similarity index with upper and lower bounds. The Jaccard similarity index
+   * <i>J(A,B) = (A ^ B)/(A U B)</i> is used to measure how similar the two sketches are to each
+   * other. If J = 1.0, the sketches are considered equal. If J = 0, the two sketches are
+   * distinct from each other. A Jaccard of .95 means the overlap between the two
+   * populations is 95% of the union of the two populations.
+   *
+   * <p>Note: For very large pairs of sketches, where the configured nominal entries of the sketches
+   * are 2^25 or 2^26, this method may produce unpredictable results.
+   *
+   * @param sketchA The first argument, a Tuple sketch with summary type <i>S</i>
+   * @param sketchB The second argument, a Tuple sketch with summary type <i>S</i>
+   * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries.
+   * @param <S> Summary
+   * @return a double array {LowerBound, Estimate, UpperBound} of the Jaccard index.
+   * The Upper and Lower bounds are for a confidence interval of 95.4% or +/- 2 standard deviations.
+   */
+  public static <S extends Summary> double[] jaccard(
+      final Sketch<S> sketchA,
+      final Sketch<S> sketchB,
+      final SummarySetOperations<S> summarySetOps) {
+    //Corner case checks
+    if (sketchA == null || sketchB == null) { return ZEROS.clone(); }
+    if (sketchA.isEmpty() && sketchB.isEmpty()) { return ONES.clone(); }
+    if (sketchA.isEmpty() || sketchB.isEmpty()) { return ZEROS.clone(); }
+
+    final int countA = sketchA.getRetainedEntries();
+    final int countB = sketchB.getRetainedEntries();
+
+    //Create the Union
+    final int minK = 1 << MIN_LG_NOM_LONGS;
+    final int maxK = 1 << MAX_LG_NOM_LONGS;
+    final int newK = max(min(ceilingPowerOf2(countA + countB), maxK), minK);
+    final Union<S> union = new Union<>(newK, summarySetOps);
+    union.union(sketchA);
+    union.union(sketchB);
+
+    final Sketch<S> unionAB = union.getResult();
+    final long thetaLongUAB = unionAB.getThetaLong();
+    final long thetaLongA = sketchA.getThetaLong();
+    final long thetaLongB = sketchB.getThetaLong();
+    final int countUAB = unionAB.getRetainedEntries();
+
+    //Check for identical data
+    if (countUAB == countA && countUAB == countB
+            && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) {
+      return ONES.clone();
+    }
+
+    //Create the Intersection
+    final Intersection<S> inter = new Intersection<>(summarySetOps);
+    inter.intersect(sketchA);
+    inter.intersect(sketchB);
+    inter.intersect(unionAB); //ensures that intersection is a subset of the union
+    final Sketch<S> interABU = inter.getResult();
+
+    final double lb = getLowerBoundForBoverA(unionAB, interABU);
+    final double est = getEstimateOfBoverA(unionAB, interABU);
+    final double ub = getUpperBoundForBoverA(unionAB, interABU);
+    return new double[] {lb, est, ub};
+  }
+
+  /**
+   * Computes the Jaccard similarity index with upper and lower bounds. The Jaccard similarity index
+   * <i>J(A,B) = (A ^ B)/(A U B)</i> is used to measure how similar the two sketches are to each
+   * other. If J = 1.0, the sketches are considered equal. If J = 0, the two sketches are
+   * distinct from each other. A Jaccard of .95 means the overlap between the two
+   * populations is 95% of the union of the two populations.
+   *
+   * <p>Note: For very large pairs of sketches, where the configured nominal entries of the sketches
+   * are 2^25 or 2^26, this method may produce unpredictable results.
+   *
+   * @param sketchA The first argument, a Tuple sketch with summary type <i>S</i>
+   * @param sketchB The second argument, a Theta sketch
+   * @param summary the given proxy summary for the theta sketch, which doesn't have one.
+   * This may not be null.
+   * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries.
+   * @param <S> Summary
+   * @return a double array {LowerBound, Estimate, UpperBound} of the Jaccard index.
+   * The Upper and Lower bounds are for a confidence interval of 95.4% or +/- 2 standard deviations.
+   */
+  public static <S extends Summary> double[] jaccard(
+      final Sketch<S> sketchA,
+      final org.apache.datasketches.theta.Sketch sketchB,
+      final S summary, final SummarySetOperations<S> summarySetOps) {
+    // Null case checks
+    if (summary == null) {
+      throw new SketchesArgumentException("Summary cannot be null."); }
+
+    //Corner case checks
+    if (sketchA == null || sketchB == null) { return ZEROS.clone(); }
+    if (sketchA.isEmpty() && sketchB.isEmpty()) { return ONES.clone(); }
+    if (sketchA.isEmpty() || sketchB.isEmpty()) { return ZEROS.clone(); }
+
+    final int countA = sketchA.getRetainedEntries();
+    final int countB = sketchB.getRetainedEntries(true);
+
+    //Create the Union
+    final int minK = 1 << MIN_LG_NOM_LONGS;
+    final int maxK = 1 << MAX_LG_NOM_LONGS;
+    final int newK = max(min(ceilingPowerOf2(countA + countB), maxK), minK);
+    final Union<S> union = new Union<>(newK, summarySetOps);
+    union.union(sketchA);
+    union.union(sketchB, summary);
+
+    final Sketch<S> unionAB = union.getResult();
+    final long thetaLongUAB = unionAB.getThetaLong();
+    final long thetaLongA = sketchA.getThetaLong();
+    final long thetaLongB = sketchB.getThetaLong();
+    final int countUAB = unionAB.getRetainedEntries();
+
+    //Check for identical data
+    if (countUAB == countA && countUAB == countB
+            && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) {
+      return ONES.clone();
+    }
+
+    //Create the Intersection
+    final Intersection<S> inter = new Intersection<>(summarySetOps);
+    inter.intersect(sketchA);
+    inter.intersect(sketchB, summary);
+    inter.intersect(unionAB); //ensures that intersection is a subset of the union
+    final Sketch<S> interABU = inter.getResult();
+
+    final double lb = getLowerBoundForBoverA(unionAB, interABU);
+    final double est = getEstimateOfBoverA(unionAB, interABU);
+    final double ub = getUpperBoundForBoverA(unionAB, interABU);
+    return new double[] {lb, est, ub};
+  }
+
+  /**
+   * Returns true if the two given sketches have exactly the same hash values and the same
+   * theta values. Thus, they are equivalent.
+   * @param sketchA The first argument, a Tuple sketch with summary type <i>S</i>
+   * @param sketchB The second argument, a Tuple sketch with summary type <i>S</i>
+   * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries.
+   * @param <S> Summary
+   * @return true if the two given sketches have exactly the same hash values and the same
+   * theta values.
+   */
+  public static <S extends Summary> boolean exactlyEqual(
+      final Sketch<S> sketchA,
+      final Sketch<S> sketchB,
+      final SummarySetOperations<S> summarySetOps) {
+    //Corner case checks
+    if (sketchA == null || sketchB == null) { return false; }
+    if (sketchA == sketchB) { return true; }
+    if (sketchA.isEmpty() && sketchB.isEmpty()) { return true; }
+    if (sketchA.isEmpty() || sketchB.isEmpty()) { return false; }
+
+    final int countA = sketchA.getRetainedEntries();
+    final int countB = sketchB.getRetainedEntries();
+
+    //Create the Union
+    final Union<S> union = new Union<>(ceilingPowerOf2(countA + countB), summarySetOps);
+    union.union(sketchA);
+    union.union(sketchB);
+    final Sketch<S> unionAB = union.getResult();
+    final long thetaLongUAB = unionAB.getThetaLong();
+    final long thetaLongA = sketchA.getThetaLong();
+    final long thetaLongB = sketchB.getThetaLong();
+    final int countUAB = unionAB.getRetainedEntries();
+
+    //Check for identical counts and thetas
+    if (countUAB == countA && countUAB == countB
+            && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) {
+      return true;
+    }
+    return false;
+  }
+
+  /**
+   * Returns true if the two given sketches have exactly the same hash values and the same
+   * theta values. Thus, they are equivalent.
+   * @param sketchA The first argument, a Tuple sketch with summary type <i>S</i>
+   * @param sketchB The second argument, a Theta sketch
+   * @param summary the given proxy summary for the theta sketch, which doesn't have one.
+   * This may not be null.
+   * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries.
+   * @param <S> Summary
+   * @return true if the two given sketches have exactly the same hash values and the same
+   * theta values.
+   */
+  public static <S extends Summary> boolean exactlyEqual(
+      final Sketch<S> sketchA,
+      final org.apache.datasketches.theta.Sketch sketchB,
+      final S summary, final SummarySetOperations<S> summarySetOps) {
+    // Null case checks
+    if (summary == null) {
+      throw new SketchesArgumentException("Summary cannot be null."); }
+
+    //Corner case checks
+    if (sketchA == null || sketchB == null) { return false; }
+    if (sketchA.isEmpty() && sketchB.isEmpty()) { return true; }
+    if (sketchA.isEmpty() || sketchB.isEmpty()) { return false; }
+
+    final int countA = sketchA.getRetainedEntries();
+    final int countB = sketchB.getRetainedEntries(true);
+
+    //Create the Union
+    final Union<S> union = new Union<>(ceilingPowerOf2(countA + countB), summarySetOps);
+    union.union(sketchA);
+    union.union(sketchB, summary);
+    final Sketch<S> unionAB = union.getResult();
+    final long thetaLongUAB = unionAB.getThetaLong();
+    final long thetaLongA = sketchA.getThetaLong();
+    final long thetaLongB = sketchB.getThetaLong();
+    final int countUAB = unionAB.getRetainedEntries();
+
+    //Check for identical counts and thetas
+    if (countUAB == countA && countUAB == countB
+        && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) {
+      return true;
+    }
+    return false;
+  }
+
+  /**
+   * Tests similarity of a measured Sketch against an expected Sketch.
+   * Computes the lower bound of the Jaccard index <i>J<sub>LB</sub></i> of the measured and
+   * expected sketches.
+   * if <i>J<sub>LB</sub> &ge; threshold</i>, then the sketches are considered to be
+   * similar with a confidence of 97.7%.
+   *
+   * @param measured a Tuple sketch with summary type <i>S</i> to be tested
+   * @param expected the reference Tuple sketch with summary type <i>S</i> that is considered to be correct.
+   * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries.
+   * @param threshold a real value between zero and one.
+   * @param <S> Summary
+   * @return if true, the similarity of the two sketches is greater than the given threshold
+   * with at least 97.7% confidence.
+   */
+  public static <S extends Summary> boolean similarityTest(
+      final Sketch<S> measured, final Sketch<S> expected,
+      final SummarySetOperations<S> summarySetOps,
+      final double threshold) {
+    //index 0: the lower bound
+    //index 1: the mean estimate
+    //index 2: the upper bound
+    final double jRatioLB = jaccard(measured, expected, summarySetOps)[0]; //choosing the lower bound
+    return jRatioLB >= threshold;
+  }
+
+  /**
+   * Tests similarity of a measured Sketch against an expected Sketch.
+   * Computes the lower bound of the Jaccard index <i>J<sub>LB</sub></i> of the measured and
+   * expected sketches.
+   * if <i>J<sub>LB</sub> &ge; threshold</i>, then the sketches are considered to be
+   * similar with a confidence of 97.7%.
+   *
+   * @param measured a Tuple sketch with summary type <i>S</i> to be tested
+   * @param expected the reference Theta sketch that is considered to be correct.
+   * @param summary the given proxy summary for the theta sketch, which doesn't have one.
+   * This may not be null.
+   * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries.
+   * @param threshold a real value between zero and one.
+   * @param <S> Summary
+   * @return if true, the similarity of the two sketches is greater than the given threshold
+   * with at least 97.7% confidence.
+   */
+  public static <S extends Summary> boolean similarityTest(
+      final Sketch<S> measured, final org.apache.datasketches.theta.Sketch expected,
+      final S summary, final SummarySetOperations<S> summarySetOps,
+      final double threshold) {
+    //index 0: the lower bound
+    //index 1: the mean estimate
+    //index 2: the upper bound
+    final double jRatioLB = jaccard(measured, expected, summary, summarySetOps)[0]; //choosing the lower bound
+    return jRatioLB >= threshold;
+  }
+
+  /**
+   * Tests dissimilarity of a measured Sketch against an expected Sketch.
+   * Computes the upper bound of the Jaccard index <i>J<sub>UB</sub></i> of the measured and
+   * expected sketches.
+   * if <i>J<sub>UB</sub> &le; threshold</i>, then the sketches are considered to be
+   * dissimilar with a confidence of 97.7%.
+   *
+   * @param measured a Tuple sketch with summary type <i>S</i> to be tested
+   * @param expected the reference Theta sketch that is considered to be correct.
+   * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries.
+   * @param threshold a real value between zero and one.
+   * @param <S> Summary
+   * @return if true, the dissimilarity of the two sketches is greater than the given threshold
+   * with at least 97.7% confidence.
+   */
+  public static <S extends Summary> boolean dissimilarityTest(
+      final Sketch<S> measured, final Sketch<S> expected,
+      final SummarySetOperations<S> summarySetOps,
+      final double threshold) {
+    //index 0: the lower bound
+    //index 1: the mean estimate
+    //index 2: the upper bound
+    final double jRatioUB = jaccard(measured, expected, summarySetOps)[2]; //choosing the upper bound
+    return jRatioUB <= threshold;
+  }
+
+  /**
+   * Tests dissimilarity of a measured Sketch against an expected Sketch.
+   * Computes the upper bound of the Jaccard index <i>J<sub>UB</sub></i> of the measured and
+   * expected sketches.
+   * if <i>J<sub>UB</sub> &le; threshold</i>, then the sketches are considered to be
+   * dissimilar with a confidence of 97.7%.
+   *
+   * @param measured a Tuple sketch with summary type <i>S</i> to be tested
+   * @param expected the reference Theta sketch that is considered to be correct.
+   * @param summary the given proxy summary for the theta sketch, which doesn't have one.
+   * This may not be null.
+   * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries.
+   * @param threshold a real value between zero and one.
+   * @param <S> Summary
+   * @return if true, the dissimilarity of the two sketches is greater than the given threshold
+   * with at least 97.7% confidence.
+   */
+  public static <S extends Summary> boolean dissimilarityTest(
+      final Sketch<S> measured, final org.apache.datasketches.theta.Sketch expected,
+      final S summary, final SummarySetOperations<S> summarySetOps,
+      final double threshold) {
+    //index 0: the lower bound
+    //index 1: the mean estimate
+    //index 2: the upper bound
+    final double jRatioUB = jaccard(measured, expected, summary, summarySetOps)[2]; //choosing the upper bound
+    return jRatioUB <= threshold;
+  }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java
index 99166e2..fd56b06 100644
--- a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java
+++ b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java
@@ -19,6 +19,7 @@
 
 package org.apache.datasketches.tuple;
 
+import static org.apache.datasketches.HashOperations.count;
 import static org.apache.datasketches.Util.REBUILD_THRESHOLD;
 import static org.apache.datasketches.Util.RESIZE_THRESHOLD;
 import static org.apache.datasketches.Util.ceilingPowerOf2;
@@ -158,7 +159,7 @@
     SerializerDeserializer.validateType(mem.getByte(offset++),
         SerializerDeserializer.SketchType.QuickSelectSketch);
     final byte flags = mem.getByte(offset++);
-    final boolean isBigEndian = (flags & (1 << Flags.IS_BIG_ENDIAN.ordinal())) > 0;
+    final boolean isBigEndian = (flags & 1 << Flags.IS_BIG_ENDIAN.ordinal()) > 0;
     if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) {
       throw new SketchesArgumentException("Endian byte order mismatch");
     }
@@ -166,13 +167,13 @@
     lgCurrentCapacity_ = mem.getByte(offset++);
     lgResizeFactor_ = mem.getByte(offset++);
 
-    final boolean isInSamplingMode = (flags & (1 << Flags.IS_IN_SAMPLING_MODE.ordinal())) > 0;
+    final boolean isInSamplingMode = (flags & 1 << Flags.IS_IN_SAMPLING_MODE.ordinal()) > 0;
     samplingProbability_ = isInSamplingMode ? mem.getFloat(offset) : 1f;
     if (isInSamplingMode) {
       offset += Float.BYTES;
     }
 
-    final boolean isThetaIncluded = (flags & (1 << Flags.IS_THETA_INCLUDED.ordinal())) > 0;
+    final boolean isThetaIncluded = (flags & 1 << Flags.IS_THETA_INCLUDED.ordinal()) > 0;
     if (isThetaIncluded) {
       thetaLong_ = mem.getLong(offset);
       offset += Long.BYTES;
@@ -181,7 +182,7 @@
     }
 
     int count = 0;
-    final boolean hasEntries = (flags & (1 << Flags.HAS_ENTRIES.ordinal())) > 0;
+    final boolean hasEntries = (flags & 1 << Flags.HAS_ENTRIES.ordinal()) > 0;
     if (hasEntries) {
       count = mem.getInt(offset);
       offset += Integer.BYTES;
@@ -197,7 +198,7 @@
       offset += summaryResult.getSize();
       insert(hash, summary);
     }
-    empty_ = (flags & (1 << Flags.IS_EMPTY.ordinal())) > 0;
+    empty_ = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0;
     setRebuildThreshold();
   }
 
@@ -210,6 +211,11 @@
     return count_;
   }
 
+  @Override
+  public int getCountLessThanThetaLong(final long thetaLong) {
+    return count(hashTable_, thetaLong);
+  }
+
   S[] getSummaryTable() {
     return summaryTable_;
   }
@@ -343,7 +349,7 @@
     if (count_ > 0) {
       sizeBytes += Integer.BYTES; // count
     }
-    sizeBytes += (Long.BYTES * count_) + summariesBytesLength;
+    sizeBytes += Long.BYTES * count_ + summariesBytesLength;
     final byte[] bytes = new byte[sizeBytes];
     int offset = 0;
     bytes[offset++] = PREAMBLE_LONGS;
@@ -395,7 +401,7 @@
   @SuppressWarnings("unchecked")
   void merge(final long hash, final S summary, final SummarySetOperations<S> summarySetOps) {
     empty_ = false;
-    if ((hash > 0) && (hash < thetaLong_)) {
+    if (hash > 0 && hash < thetaLong_) {
       final int index = findOrInsert(hash);
       if (index < 0) {
         insertSummary(~index, (S)summary.copy()); //did not find, so insert
@@ -479,7 +485,7 @@
     lgCurrentCapacity_ = Integer.numberOfTrailingZeros(newSize);
     count_ = 0;
     for (int i = 0; i < oldHashTable.length; i++) {
-      if ((oldSummaryTable[i] != null) && (oldHashTable[i] < thetaLong_)) {
+      if (oldSummaryTable[i] != null && oldHashTable[i] < thetaLong_) {
         insert(oldHashTable[i], oldSummaryTable[i]);
       }
     }
diff --git a/src/main/java/org/apache/datasketches/tuple/Sketch.java b/src/main/java/org/apache/datasketches/tuple/Sketch.java
index 02b7bf5..1ca7f9e 100644
--- a/src/main/java/org/apache/datasketches/tuple/Sketch.java
+++ b/src/main/java/org/apache/datasketches/tuple/Sketch.java
@@ -137,7 +137,7 @@
    * @return true if the sketch is in estimation mode.
    */
   public boolean isEstimationMode() {
-    return ((thetaLong_ < Long.MAX_VALUE) && !isEmpty());
+    return thetaLong_ < Long.MAX_VALUE && !isEmpty();
   }
 
   /**
@@ -146,6 +146,13 @@
   public abstract int getRetainedEntries();
 
   /**
+   * Gets the number of hash values less than the given theta expressed as a long.
+   * @param thetaLong the given theta as a long between zero and <i>Long.MAX_VALUE</i>.
+   * @return the number of hash values less than the given thetaLong.
+   */
+  public abstract int getCountLessThanThetaLong(final long thetaLong);
+
+  /**
    * Gets the value of theta as a double between zero and one
    * @return the value of theta as a double
    */
diff --git a/src/test/java/org/apache/datasketches/BoundsOnRatiosInTupleSketchedSetsTest.java b/src/test/java/org/apache/datasketches/BoundsOnRatiosInTupleSketchedSetsTest.java
new file mode 100644
index 0000000..15311f3
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/BoundsOnRatiosInTupleSketchedSetsTest.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches;
+
+import org.apache.datasketches.theta.UpdateSketch;
+import org.apache.datasketches.theta.UpdateSketchBuilder;
+import org.apache.datasketches.tuple.Sketch;
+import org.apache.datasketches.tuple.UpdatableSketch;
+import org.apache.datasketches.tuple.UpdatableSketchBuilder;
+import org.apache.datasketches.tuple.adouble.DoubleSummary;
+import org.apache.datasketches.tuple.adouble.DoubleSummaryFactory;
+import org.apache.datasketches.tuple.adouble.DoubleSummarySetOperations;
+import org.apache.datasketches.tuple.Intersection;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+/**
+ * @author Lee Rhodes
+ * @author David Cromberge
+ */
+@SuppressWarnings("javadoc")
+public class BoundsOnRatiosInTupleSketchedSetsTest {
+
+  private final DoubleSummary.Mode umode = DoubleSummary.Mode.Sum;
+  private final DoubleSummarySetOperations dsso = new DoubleSummarySetOperations();
+  private final DoubleSummaryFactory factory = new DoubleSummaryFactory(umode);
+  private final UpdateSketchBuilder thetaBldr = UpdateSketch.builder();
+  private final UpdatableSketchBuilder<Double, DoubleSummary> tupleBldr = new UpdatableSketchBuilder<>(factory);
+  private final Double constSummary = 1.0;
+
+  @Test
+  public void checkNormalReturns1() { // tuple, tuple
+    final UpdatableSketch<Double, DoubleSummary> skA = tupleBldr.build(); //4K
+    final UpdatableSketch<Double, DoubleSummary> skC = tupleBldr.build();
+    final int uA = 10000;
+    final int uC = 100000;
+    for (int i = 0; i < uA; i++) { skA.update(i, constSummary); }
+    for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2), constSummary); }
+    final Intersection<DoubleSummary> inter = new Intersection<>(dsso);
+    inter.intersect(skA);
+    inter.intersect(skC);
+    final Sketch<DoubleSummary> skB = inter.getResult();
+
+    double est = BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skB);
+    double lb = BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA(skA, skB);
+    double ub = BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA(skA, skB);
+    assertTrue(ub > est);
+    assertTrue(est > lb);
+    assertEquals(est, 0.5, .03);
+    println("ub : " + ub);
+    println("est: " + est);
+    println("lb : " + lb);
+    skA.reset(); //skA is now empty
+    est = BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skB);
+    lb = BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA(skA, skB);
+    ub = BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA(skA, skB);
+    println("ub : " + ub);
+    println("est: " + est);
+    println("lb : " + lb);
+    skC.reset(); //Now both are empty
+    est = BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skC);
+    lb = BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA(skA, skC);
+    ub = BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA(skA, skC);
+    println("ub : " + ub);
+    println("est: " + est);
+    println("lb : " + lb);
+  }
+
+  @Test
+  public void checkNormalReturns2() { // tuple, theta
+    final UpdatableSketch<Double, DoubleSummary> skA = tupleBldr.build(); //4K
+    final UpdateSketch skC = thetaBldr.build();
+    final int uA = 10000;
+    final int uC = 100000;
+    for (int i = 0; i < uA; i++) { skA.update(i, constSummary); }
+    for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2)); }
+    final Intersection<DoubleSummary> inter = new Intersection<>(dsso);
+    inter.intersect(skA);
+    inter.intersect(skC, factory.newSummary());
+    final Sketch<DoubleSummary> skB = inter.getResult();
+
+    double est = BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skB);
+    double lb = BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA(skA, skB);
+    double ub = BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA(skA, skB);
+    assertTrue(ub > est);
+    assertTrue(est > lb);
+    assertEquals(est, 0.5, .03);
+    println("ub : " + ub);
+    println("est: " + est);
+    println("lb : " + lb);
+    skA.reset(); //skA is now empty
+    est = BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skB);
+    lb = BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA(skA, skB);
+    ub = BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA(skA, skB);
+    println("ub : " + ub);
+    println("est: " + est);
+    println("lb : " + lb);
+    skC.reset(); //Now both are empty
+    est = BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skC);
+    lb = BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA(skA, skC);
+    ub = BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA(skA, skC);
+    println("ub : " + ub);
+    println("est: " + est);
+    println("lb : " + lb);
+  }
+
+  @Test(expectedExceptions = SketchesArgumentException.class)
+  public void checkAbnormalReturns1() { // tuple, tuple
+    final UpdatableSketch<Double, DoubleSummary> skA = tupleBldr.build(); //4K
+    final UpdatableSketch<Double, DoubleSummary> skC = tupleBldr.build();
+    final int uA = 100000;
+    final int uC = 10000;
+    for (int i = 0; i < uA; i++) { skA.update(i, constSummary); }
+    for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2), constSummary); }
+    BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skC);
+  }
+
+  @Test(expectedExceptions = SketchesArgumentException.class)
+  public void checkAbnormalReturns2() { // tuple, theta
+    final UpdatableSketch<Double, DoubleSummary> skA = tupleBldr.build(); //4K
+    final UpdateSketch skC = thetaBldr.build();
+    final int uA = 100000;
+    final int uC = 10000;
+    for (int i = 0; i < uA; i++) { skA.update(i, constSummary); }
+    for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2)); }
+    BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skC);
+  }
+
+  @Test
+  public void printlnTest() {
+    println("PRINTING: " + this.getClass().getName());
+  }
+
+  /**
+   * @param s value to print
+   */
+  static void println(final String s) {
+    //System.out.println(s); //disable here
+  }
+}
diff --git a/src/test/java/org/apache/datasketches/tuple/JaccardSimilarityTest.java b/src/test/java/org/apache/datasketches/tuple/JaccardSimilarityTest.java
new file mode 100644
index 0000000..49a0fa1
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/tuple/JaccardSimilarityTest.java
@@ -0,0 +1,458 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple;
+
+import org.apache.datasketches.tuple.adouble.DoubleSummary;
+import org.apache.datasketches.tuple.adouble.DoubleSummaryFactory;
+import org.apache.datasketches.tuple.adouble.DoubleSummarySetOperations;
+import org.testng.annotations.Test;
+
+import org.apache.datasketches.theta.UpdateSketch;
+import org.apache.datasketches.theta.UpdateSketchBuilder;
+import static org.apache.datasketches.tuple.JaccardSimilarity.dissimilarityTest;
+import static org.apache.datasketches.tuple.JaccardSimilarity.exactlyEqual;
+import static org.apache.datasketches.tuple.JaccardSimilarity.jaccard;
+import static org.apache.datasketches.tuple.JaccardSimilarity.similarityTest;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+
+/**
+ * @author Lee Rhodes
+ * @author David Cromberge
+ */
+@SuppressWarnings("javadoc")
+public class JaccardSimilarityTest {
+  private final DoubleSummary.Mode umode = DoubleSummary.Mode.Sum;
+  private final DoubleSummarySetOperations dsso = new DoubleSummarySetOperations();
+  private final DoubleSummaryFactory factory = new DoubleSummaryFactory(umode);
+  private final UpdateSketchBuilder thetaBldr = UpdateSketch.builder();
+  private final UpdatableSketchBuilder<Double, DoubleSummary> tupleBldr = new UpdatableSketchBuilder<>(factory);
+  private final Double constSummary = 1.0;
+
+  @Test
+  public void checkNullsEmpties1() { // tuple, tuple
+    int minK = 1 << 12;
+    double threshold = 0.95;
+    println("Check nulls & empties, minK: " + minK + "\t Th: " + threshold);
+    //check both null
+    double[] jResults = jaccard(null, null, dsso);
+    boolean state = jResults[1] > threshold;
+    println("null \t null:\t" + state + "\t" + jaccardString(jResults));
+    assertFalse(state);
+
+    state = exactlyEqual(null, null, dsso);
+    assertFalse(state);
+
+    final UpdatableSketch<Double, DoubleSummary> measured = tupleBldr.setNominalEntries(minK).build();
+    final UpdatableSketch<Double, DoubleSummary> expected = tupleBldr.setNominalEntries(minK).build();
+
+    //check both empty
+    jResults = jaccard(measured, expected, dsso);
+    state = jResults[1] > threshold;
+    println("empty\tempty:\t" + state + "\t" + jaccardString(jResults));
+    assertTrue(state);
+
+    state = exactlyEqual(measured, expected, dsso);
+    assertTrue(state);
+
+    state = exactlyEqual(measured, measured, dsso);
+    assertTrue(state);
+
+    //adjust one
+    expected.update(1, constSummary);
+    jResults = jaccard(measured, expected, dsso);
+    state = jResults[1] > threshold;
+    println("empty\t    1:\t" + state + "\t" + jaccardString(jResults));
+    assertFalse(state);
+
+    state = exactlyEqual(measured, expected, dsso);
+    assertFalse(state);
+
+    println("");
+  }
+
+  @Test
+  public void checkNullsEmpties2() { // tuple, theta
+    int minK = 1 << 12;
+    double threshold = 0.95;
+    println("Check nulls & empties, minK: " + minK + "\t Th: " + threshold);
+    //check both null
+    double[] jResults = jaccard(null, null, factory.newSummary(), dsso);
+    boolean state = jResults[1] > threshold;
+    println("null \t null:\t" + state + "\t" + jaccardString(jResults));
+    assertFalse(state);
+
+    state = exactlyEqual(null, null, factory.newSummary(), dsso);
+    assertFalse(state);
+
+    final UpdatableSketch<Double, DoubleSummary> measured = tupleBldr.setNominalEntries(minK).build();
+    final UpdateSketch expected = thetaBldr.setNominalEntries(minK).build();
+
+    //check both empty
+    jResults = jaccard(measured, expected, factory.newSummary(), dsso);
+    state = jResults[1] > threshold;
+    println("empty\tempty:\t" + state + "\t" + jaccardString(jResults));
+    assertTrue(state);
+
+    state = exactlyEqual(measured, expected, factory.newSummary(), dsso);
+    assertTrue(state);
+
+    state = exactlyEqual(measured, measured, dsso);
+    assertTrue(state);
+
+    //adjust one
+    expected.update(1);
+    jResults = jaccard(measured, expected, factory.newSummary(), dsso);
+    state = jResults[1] > threshold;
+    println("empty\t    1:\t" + state + "\t" + jaccardString(jResults));
+    assertFalse(state);
+
+    state = exactlyEqual(measured, expected, factory.newSummary(), dsso);
+    assertFalse(state);
+
+    println("");
+  }
+
+  @Test
+  public void checkExactMode1() { // tuple, tuple
+    int k = 1 << 12;
+    int u = k;
+    double threshold = 0.9999;
+    println("Exact Mode, minK: " + k + "\t Th: " + threshold);
+
+    final UpdatableSketch<Double, DoubleSummary> measured = tupleBldr.setNominalEntries(k).build();
+    final UpdatableSketch<Double, DoubleSummary> expected = tupleBldr.setNominalEntries(k).build();
+
+    for (int i = 0; i < (u-1); i++) { //one short
+      measured.update(i, constSummary);
+      expected.update(i, constSummary);
+    }
+
+    double[] jResults = jaccard(measured, expected, dsso);
+    boolean state = jResults[1] > threshold;
+    println(state + "\t" + jaccardString(jResults));
+    assertTrue(state);
+
+    state = exactlyEqual(measured, expected, dsso);
+    assertTrue(state);
+
+    measured.update(u-1, constSummary); //now exactly k entries
+    expected.update(u, constSummary);   //now exactly k entries but differs by one
+    jResults = jaccard(measured, expected, dsso);
+    state = jResults[1] > threshold;
+    println(state + "\t" + jaccardString(jResults));
+    assertFalse(state);
+
+    state = exactlyEqual(measured, expected, dsso);
+    assertFalse(state);
+
+    println("");
+  }
+
+  @Test
+  public void checkExactMode2() { // tuple, theta
+    int k = 1 << 12;
+    int u = k;
+    double threshold = 0.9999;
+    println("Exact Mode, minK: " + k + "\t Th: " + threshold);
+
+    final UpdatableSketch<Double, DoubleSummary> measured = tupleBldr.setNominalEntries(k).build();
+    final UpdateSketch expected = thetaBldr.setNominalEntries(k).build();
+
+    for (int i = 0; i < (u-1); i++) { //one short
+      measured.update(i, constSummary);
+      expected.update(i);
+    }
+
+    double[] jResults = jaccard(measured, expected, factory.newSummary(), dsso);
+    boolean state = jResults[1] > threshold;
+    println(state + "\t" + jaccardString(jResults));
+    assertTrue(state);
+
+    state = exactlyEqual(measured, expected, factory.newSummary(), dsso);
+    assertTrue(state);
+
+    measured.update(u-1, constSummary); //now exactly k entries
+    expected.update(u);   //now exactly k entries but differs by one
+    jResults = jaccard(measured, expected, factory.newSummary(), dsso);
+    state = jResults[1] > threshold;
+    println(state + "\t" + jaccardString(jResults));
+    assertFalse(state);
+
+    state = exactlyEqual(measured, expected, factory.newSummary(), dsso);
+    assertFalse(state);
+
+    println("");
+  }
+
+  @Test
+  public void checkEstMode1() { // tuple, tuple
+    int k = 1 << 12;
+    int u = 1 << 20;
+    double threshold = 0.9999;
+    println("Estimation Mode, minK: " + k + "\t Th: " + threshold);
+
+    final UpdatableSketch<Double, DoubleSummary> measured = tupleBldr.setNominalEntries(k).build();
+    final UpdatableSketch<Double, DoubleSummary> expected = tupleBldr.setNominalEntries(k).build();
+
+    for (int i = 0; i < u; i++) {
+      measured.update(i, constSummary);
+      expected.update(i, constSummary);
+    }
+
+    double[] jResults = jaccard(measured, expected, dsso);
+    boolean state = jResults[1] > threshold;
+    println(state + "\t" + jaccardString(jResults));
+    assertTrue(state);
+
+    state = exactlyEqual(measured, expected, dsso);
+    assertTrue(state);
+
+    for (int i = u; i < (u + 50); i++) { //empirically determined
+      measured.update(i, constSummary);
+    }
+
+    jResults = jaccard(measured, expected, dsso);
+    state = jResults[1] >= threshold;
+    println(state + "\t" + jaccardString(jResults));
+    assertFalse(state);
+
+    state = exactlyEqual(measured, expected, dsso);
+    assertFalse(state);
+
+    println("");
+  }
+
+  @Test
+  public void checkEstMode2() { // tuple, theta
+    int k = 1 << 12;
+    int u = 1 << 20;
+    double threshold = 0.9999;
+    println("Estimation Mode, minK: " + k + "\t Th: " + threshold);
+
+    final UpdatableSketch<Double, DoubleSummary> measured = tupleBldr.setNominalEntries(k).build();
+    final UpdateSketch expected = thetaBldr.setNominalEntries(k).build();
+
+    for (int i = 0; i < u; i++) {
+      measured.update(i, constSummary);
+      expected.update(i);
+    }
+
+    double[] jResults = jaccard(measured, expected, factory.newSummary(), dsso);
+    boolean state = jResults[1] > threshold;
+    println(state + "\t" + jaccardString(jResults));
+    assertTrue(state);
+
+    state = exactlyEqual(measured, expected, factory.newSummary(), dsso);
+    assertTrue(state);
+
+    for (int i = u; i < (u + 50); i++) { //empirically determined
+      measured.update(i, constSummary);
+    }
+
+    jResults = jaccard(measured, expected, factory.newSummary(), dsso);
+    state = jResults[1] >= threshold;
+    println(state + "\t" + jaccardString(jResults));
+    assertFalse(state);
+
+    state = exactlyEqual(measured, expected, factory.newSummary(), dsso);
+    assertFalse(state);
+
+    println("");
+  }
+
+  /**
+   * Enable printing on this test and you will see that the distribution is pretty tight,
+   * about +/- 0.7%, which is pretty good since the accuracy of the underlying sketch is about
+   * +/- 1.56%.
+   */
+  @Test
+  public void checkSimilarity1() { // tuple, tuple
+    int minK = 1 << 12;
+    int u1 = 1 << 20;
+    int u2 = (int) (u1 * 0.95);
+    double threshold = 0.943;
+    println("Estimation Mode, minK: " + minK + "\t Th: " + threshold);
+
+    final UpdatableSketch<Double, DoubleSummary> measured = tupleBldr.setNominalEntries(minK).build();
+    final UpdatableSketch<Double, DoubleSummary> expected = tupleBldr.setNominalEntries(minK).build();
+
+    for (int i = 0; i < u1; i++) {
+      expected.update(i, constSummary);
+    }
+
+    for (int i = 0; i < u2; i++) {
+      measured.update(i, constSummary);
+    }
+
+    double[] jResults = jaccard(measured, expected, dsso);
+    boolean state = similarityTest(measured, expected, dsso, threshold);
+    println(state + "\t" + jaccardString(jResults));
+    assertTrue(state);
+    //check identity case
+    state = similarityTest(measured, measured, dsso, threshold);
+    assertTrue(state);
+  }
+
+  /**
+   * Enable printing on this test and you will see that the distribution is pretty tight,
+   * about +/- 0.7%, which is pretty good since the accuracy of the underlying sketch is about
+   * +/- 1.56%.
+   */
+  @Test
+  public void checkSimilarity2() { // tuple, theta
+    int minK = 1 << 12;
+    int u1 = 1 << 20;
+    int u2 = (int) (u1 * 0.95);
+    double threshold = 0.943;
+    println("Estimation Mode, minK: " + minK + "\t Th: " + threshold);
+
+    final UpdatableSketch<Double, DoubleSummary> measured = tupleBldr.setNominalEntries(minK).build();
+    final UpdateSketch expected = thetaBldr.setNominalEntries(minK).build();
+
+    for (int i = 0; i < u1; i++) {
+      expected.update(i);
+    }
+
+    for (int i = 0; i < u2; i++) {
+      measured.update(i, constSummary);
+    }
+
+    double[] jResults = jaccard(measured, expected, factory.newSummary(), dsso);
+    boolean state = similarityTest(measured, expected, factory.newSummary(), dsso, threshold);
+    println(state + "\t" + jaccardString(jResults));
+    assertTrue(state);
+    //check identity case
+    state = similarityTest(measured, measured, dsso, threshold);
+    assertTrue(state);
+  }
+
+  /**
+   * Enable printing on this test and you will see that the distribution is much looser,
+   * about +/- 14%.  This is due to the fact that intersections loose accuracy as the ratio of
+   * intersection to the union becomes a small number.
+   */
+  @Test
+  public void checkDissimilarity1() { // tuple, tuple
+    int minK = 1 << 12;
+    int u1 = 1 << 20;
+    int u2 = (int) (u1 * 0.05);
+    double threshold = 0.061;
+    println("Estimation Mode, minK: " + minK + "\t Th: " + threshold);
+
+    final UpdatableSketch<Double, DoubleSummary> measured = tupleBldr.setNominalEntries(minK).setNominalEntries(minK).build();
+    final UpdatableSketch<Double, DoubleSummary> expected = tupleBldr.setNominalEntries(minK).setNominalEntries(minK).build();
+
+    for (int i = 0; i < u1; i++) {
+      expected.update(i, constSummary);
+    }
+
+    for (int i = 0; i < u2; i++) {
+      measured.update(i, constSummary);
+    }
+
+    double[] jResults = jaccard(measured, expected, dsso);
+    boolean state = dissimilarityTest(measured, expected, dsso, threshold);
+    println(state + "\t" + jaccardString(jResults));
+    assertTrue(state);
+  }
+
+  /**
+   * Enable printing on this test and you will see that the distribution is much looser,
+   * about +/- 14%.  This is due to the fact that intersections loose accuracy as the ratio of
+   * intersection to the union becomes a small number.
+   */
+  @Test
+  public void checkDissimilarity2() { // tuple, theta
+    int minK = 1 << 12;
+    int u1 = 1 << 20;
+    int u2 = (int) (u1 * 0.05);
+    double threshold = 0.061;
+    println("Estimation Mode, minK: " + minK + "\t Th: " + threshold);
+
+    final UpdatableSketch<Double, DoubleSummary> measured = tupleBldr.setNominalEntries(minK).setNominalEntries(minK).build();
+    final UpdateSketch expected = thetaBldr.setNominalEntries(minK).build();
+
+    for (int i = 0; i < u1; i++) {
+      expected.update(i);
+    }
+
+    for (int i = 0; i < u2; i++) {
+      measured.update(i, constSummary);
+    }
+
+    double[] jResults = jaccard(measured, expected, factory.newSummary(), dsso);
+    boolean state = dissimilarityTest(measured, expected, factory.newSummary(), dsso, threshold);
+    println(state + "\t" + jaccardString(jResults));
+    assertTrue(state);
+  }
+
+  private static String jaccardString(double[] jResults) {
+    double lb = jResults[0];
+    double est = jResults[1];
+    double ub = jResults[2];
+    return lb + "\t" + est + "\t" + ub + "\t" + ((lb/est) - 1.0) + "\t" + ((ub/est) - 1.0);
+  }
+
+  @Test
+  public void checkMinK1() { // tuple, tuple
+    final UpdatableSketch<Double, DoubleSummary> skA = tupleBldr.build(); //4096
+    final UpdatableSketch<Double, DoubleSummary> skB = tupleBldr.build(); //4096
+    skA.update(1, constSummary);
+    skB.update(1, constSummary);
+    double[] result = jaccard(skA, skB, dsso);
+    println(result[0] + ", " + result[1] + ", " + result[2]);
+    for (int i = 1; i < 4096; i++) {
+      skA.update(i, constSummary);
+      skB.update(i, constSummary);
+    }
+    result = jaccard(skA, skB, dsso);
+    println(result[0] + ", " + result[1] + ", " + result[2]);
+  }
+
+  @Test
+  public void checkMinK2() { // tuple, theta
+    final UpdatableSketch<Double, DoubleSummary> skA = tupleBldr.build(); //4096
+    final UpdateSketch skB = UpdateSketch.builder().build(); //4096
+    skA.update(1, constSummary);
+    skB.update(1);
+    double[] result = jaccard(skA, skB, factory.newSummary(), dsso);
+    println(result[0] + ", " + result[1] + ", " + result[2]);
+    for (int i = 1; i < 4096; i++) {
+      skA.update(i, constSummary);
+      skB.update(i);
+    }
+    result = jaccard(skA, skB, factory.newSummary(), dsso);
+    println(result[0] + ", " + result[1] + ", " + result[2]);
+  }
+
+  @Test
+  public void printlnTest() {
+    println("PRINTING: "+this.getClass().getName());
+  }
+
+  /**
+   * @param s value to print
+   */
+  static void println(String s) {
+    //System.out.println(s); //disable here
+  }
+
+}