| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| #include <catch2/catch.hpp> |
| |
| #include <theta_union.hpp> |
| |
| #include <stdexcept> |
| |
| namespace datasketches { |
| |
| TEST_CASE("theta union: empty", "[theta_union]") { |
| update_theta_sketch sketch1 = update_theta_sketch::builder().build(); |
| theta_union u = theta_union::builder().build(); |
| compact_theta_sketch sketch2 = u.get_result(); |
| REQUIRE(sketch2.get_num_retained() == 0); |
| REQUIRE(sketch2.is_empty()); |
| REQUIRE_FALSE(sketch2.is_estimation_mode()); |
| |
| u.update(sketch1); |
| sketch2 = u.get_result(); |
| REQUIRE(sketch2.get_num_retained() == 0); |
| REQUIRE(sketch2.is_empty()); |
| REQUIRE_FALSE(sketch2.is_estimation_mode()); |
| } |
| |
| TEST_CASE("theta union: non empty no retained keys", "[theta_union]") { |
| update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.001f).build(); |
| update_sketch.update(1); |
| theta_union u = theta_union::builder().build(); |
| u.update(update_sketch); |
| compact_theta_sketch sketch = u.get_result(); |
| REQUIRE(sketch.get_num_retained() == 0); |
| REQUIRE_FALSE(sketch.is_empty()); |
| REQUIRE(sketch.is_estimation_mode()); |
| REQUIRE(sketch.get_theta() == Approx(0.001).margin(1e-10)); |
| } |
| |
| TEST_CASE("theta union: exact mode half overlap", "[theta_union]") { |
| auto sketch1 = update_theta_sketch::builder().build(); |
| int value = 0; |
| for (int i = 0; i < 1000; i++) sketch1.update(value++); |
| |
| auto sketch2 = update_theta_sketch::builder().build(); |
| value = 500; |
| for (int i = 0; i < 1000; i++) sketch2.update(value++); |
| |
| auto u = theta_union::builder().build(); |
| u.update(sketch1); |
| u.update(sketch2); |
| auto sketch3 = u.get_result(); |
| REQUIRE_FALSE(sketch3.is_empty()); |
| REQUIRE_FALSE(sketch3.is_estimation_mode()); |
| REQUIRE(sketch3.get_estimate() == 1500.0); |
| |
| u.reset(); |
| sketch3 = u.get_result(); |
| REQUIRE(sketch3.get_num_retained() == 0); |
| REQUIRE(sketch3.is_empty()); |
| REQUIRE_FALSE(sketch3.is_estimation_mode()); |
| } |
| |
| TEST_CASE("theta union: exact mode half overlap wrapped compact", "[theta_union]") { |
| auto sketch1 = update_theta_sketch::builder().build(); |
| int value = 0; |
| for (int i = 0; i < 1000; i++) sketch1.update(value++); |
| auto bytes1 = sketch1.compact().serialize(); |
| |
| auto sketch2 = update_theta_sketch::builder().build(); |
| value = 500; |
| for (int i = 0; i < 1000; i++) sketch2.update(value++); |
| auto bytes2 = sketch2.compact().serialize(); |
| |
| auto u = theta_union::builder().build(); |
| u.update(wrapped_compact_theta_sketch::wrap(bytes1.data(), bytes1.size())); |
| u.update(wrapped_compact_theta_sketch::wrap(bytes2.data(), bytes2.size())); |
| compact_theta_sketch sketch3 = u.get_result(); |
| REQUIRE_FALSE(sketch3.is_empty()); |
| REQUIRE_FALSE(sketch3.is_estimation_mode()); |
| REQUIRE(sketch3.get_estimate() == 1500.0); |
| } |
| |
| TEST_CASE("theta union: estimation mode half overlap", "[theta_union]") { |
| auto sketch1 = update_theta_sketch::builder().build(); |
| int value = 0; |
| for (int i = 0; i < 10000; i++) sketch1.update(value++); |
| |
| auto sketch2 = update_theta_sketch::builder().build(); |
| value = 5000; |
| for (int i = 0; i < 10000; i++) sketch2.update(value++); |
| |
| auto u = theta_union::builder().build(); |
| u.update(sketch1); |
| u.update(sketch2); |
| auto sketch3 = u.get_result(); |
| REQUIRE_FALSE(sketch3.is_empty()); |
| REQUIRE(sketch3.is_estimation_mode()); |
| REQUIRE(sketch3.get_estimate() == Approx(15000).margin(15000 * 0.01)); |
| //std::cerr << sketch3.to_string(true); |
| |
| u.reset(); |
| sketch3 = u.get_result(); |
| REQUIRE(sketch3.get_num_retained() == 0); |
| REQUIRE(sketch3.is_empty()); |
| REQUIRE_FALSE(sketch3.is_estimation_mode()); |
| } |
| |
| TEST_CASE("theta union: seed mismatch", "[theta_union]") { |
| update_theta_sketch sketch = update_theta_sketch::builder().build(); |
| sketch.update(1); // non-empty should not be ignored |
| theta_union u = theta_union::builder().set_seed(123).build(); |
| REQUIRE_THROWS_AS(u.update(sketch), std::invalid_argument); |
| } |
| |
| TEST_CASE("theta union: larger K", "[theta_union]") { |
| auto update_sketch1 = datasketches::update_theta_sketch::builder().set_lg_k(14).build(); |
| for(int i = 0; i < 16384; ++i) update_sketch1.update(i); |
| |
| auto update_sketch2 = datasketches::update_theta_sketch::builder().set_lg_k(14).build(); |
| for(int i = 0; i < 26384; ++i) update_sketch2.update(i); |
| |
| auto update_sketch3 = datasketches::update_theta_sketch::builder().set_lg_k(14).build(); |
| for(int i = 0; i < 86384; ++i) update_sketch3.update(i); |
| |
| auto union1 = datasketches::theta_union::builder().set_lg_k(16).build(); |
| union1.update(update_sketch2); |
| union1.update(update_sketch1); |
| union1.update(update_sketch3); |
| auto result1 = union1.get_result(); |
| REQUIRE(result1.get_estimate() == update_sketch3.get_estimate()); |
| |
| auto union2 = datasketches::theta_union::builder().set_lg_k(16).build(); |
| union2.update(update_sketch1); |
| union2.update(update_sketch3); |
| union2.update(update_sketch2); |
| auto result2 = union2.get_result(); |
| REQUIRE(result2.get_estimate() == update_sketch3.get_estimate()); |
| } |
| |
| } /* namespace datasketches */ |