import unittest
from datasketches import cpc_sketch, cpc_union
class CpcTest(unittest.TestCase):
def test_cpc_example(self):
k = 12 # 2^k = 4096 rows in the table
n = 1 << 18 # ~256k unique values
# create a couple sketches and inject some values
# we'll have 1/4 of the values overlap
cpc = cpc_sketch(k)
cpc2 = cpc_sketch(k)
offset = int(3 * n / 4) # it's a float w/o cast
# because we hash on the bits, not an abstract numeric value,
# cpc.update(1) and cpc.update(1.0) give different results.
for i in range(0, n):
cpc2.update(i + offset)
# although we provide get_composite_estimate() and get_estimate(),
# the latter will always give the best available estimate. we
# recommend using get_estimate().
# we can check that the upper and lower bounds bracket the
# estimate, without needing to know the exact value.
self.assertLessEqual(cpc.get_lower_bound(1), cpc.get_estimate())
self.assertGreaterEqual(cpc.get_upper_bound(1), cpc.get_estimate())
# unioning uses a separate class, but we need to get_result()
# tp query the unioned sketches
union = cpc_union(k)
result = union.get_result()
# since our process here (including post-union CPC) is
# deterministic, we have checked and know the exact
# answer is within one standard deviation of the estimate
self.assertLessEqual(result.get_lower_bound(1), 7 * n / 4)
self.assertGreaterEqual(result.get_upper_bound(1), 7 * n / 4)
# serialize for storage and reconstruct
sk_bytes = result.serialize()
new_cpc = cpc_sketch.deserialize(sk_bytes)
if __name__ == '__main__':