theta intersection and a-not-b
diff --git a/tuple/CMakeLists.txt b/tuple/CMakeLists.txt
index fce73ab..411c712 100644
--- a/tuple/CMakeLists.txt
+++ b/tuple/CMakeLists.txt
@@ -37,13 +37,15 @@
list(APPEND tuple_HEADERS "include/tuple_union.hpp;include/tuple_union_impl.hpp")
list(APPEND tuple_HEADERS "include/tuple_intersection.hpp;include/tuple_intersection_impl.hpp")
list(APPEND tuple_HEADERS "include/tuple_a_not_b.hpp;include/tuple_a_not_b_impl.hpp")
+list(APPEND tuple_HEADERS "include/array_of_doubles_sketch.hpp;include/array_of_doubles_sketch_impl.hpp")
list(APPEND tuple_HEADERS "include/theta_update_sketch_base.hpp;include/theta_update_sketch_base_impl.hpp")
list(APPEND tuple_HEADERS "include/theta_union_base.hpp;include/theta_union_base_impl.hpp")
list(APPEND tuple_HEADERS "include/theta_intersection_base.hpp;include/theta_intersection_base_impl.hpp")
list(APPEND tuple_HEADERS "include/theta_set_difference_base.hpp;include/theta_set_difference_base_impl.hpp")
list(APPEND tuple_HEADERS "include/theta_sketch_experimental.hpp;include/theta_sketch_experimental_impl.hpp")
list(APPEND tuple_HEADERS "include/theta_union_experimental.hpp;include/theta_union_experimental_impl.hpp")
-list(APPEND tuple_HEADERS "include/array_of_doubles_sketch.hpp;include/array_of_doubles_sketch_impl.hpp")
+list(APPEND tuple_HEADERS "include/theta_intersection_experimental.hpp;include/theta_intersection_experimental_impl.hpp")
+list(APPEND tuple_HEADERS "include/theta_a_not_b_experimental.hpp;include/theta_a_not_b_experimental_impl.hpp")
install(TARGETS tuple
EXPORT ${PROJECT_NAME}
@@ -62,6 +64,8 @@
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_intersection_impl.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_a_not_b.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_a_not_b_impl.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch_impl.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_update_sketch_base.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_update_sketch_base_impl.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_union_base.hpp
@@ -74,6 +78,8 @@
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_sketch_experimental_impl.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_union_experimental.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_union_experimental_impl.hpp
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch.hpp
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch_impl.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_intersection_experimental.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_intersection_experimental_impl.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_a_not_b_experimental.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_a_not_b_experimental_impl.hpp
)
diff --git a/tuple/include/theta_a_not_b_experimental.hpp b/tuple/include/theta_a_not_b_experimental.hpp
new file mode 100644
index 0000000..ed29ce5
--- /dev/null
+++ b/tuple/include/theta_a_not_b_experimental.hpp
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_A_NOT_B_EXPERIMENTAL_HPP_
+#define THETA_A_NOT_B_EXPERIMENTAL_HPP_
+
+#include "theta_sketch_experimental.hpp"
+#include "theta_set_difference_base.hpp"
+
+namespace datasketches {
+
+template<typename Allocator = std::allocator<uint64_t>>
+class theta_a_not_b_experimental {
+public:
+ using Entry = uint64_t;
+ using ExtractKey = trivial_extract_key;
+ using Sketch = theta_sketch_experimental<Allocator>;
+ using CompactSketch = compact_theta_sketch_experimental<Allocator>;
+ using State = theta_set_difference_base<Entry, ExtractKey, Sketch, CompactSketch, Allocator>;
+
+ explicit theta_a_not_b_experimental(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
+
+ /**
+ * Computes the a-not-b set operation given two sketches.
+ * @return the result of a-not-b
+ */
+ template<typename FwdSketch>
+ CompactSketch compute(FwdSketch&& a, const Sketch& b, bool ordered = true) const;
+
+private:
+ State state_;
+};
+
+} /* namespace datasketches */
+
+#include "theta_a_not_b_experimental_impl.hpp"
+
+#endif
diff --git a/tuple/include/theta_a_not_b_experimental_impl.hpp b/tuple/include/theta_a_not_b_experimental_impl.hpp
new file mode 100644
index 0000000..57b3f70
--- /dev/null
+++ b/tuple/include/theta_a_not_b_experimental_impl.hpp
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+namespace datasketches {
+
+template<typename A>
+theta_a_not_b_experimental<A>::theta_a_not_b_experimental(uint64_t seed, const A& allocator):
+state_(seed, allocator)
+{}
+
+template<typename A>
+template<typename SS>
+auto theta_a_not_b_experimental<A>::compute(SS&& a, const Sketch& b, bool ordered) const -> CompactSketch {
+ return state_.compute(std::forward<SS>(a), b, ordered);
+}
+
+} /* namespace datasketches */
diff --git a/tuple/include/theta_intersection_experimental.hpp b/tuple/include/theta_intersection_experimental.hpp
new file mode 100644
index 0000000..293b2e9
--- /dev/null
+++ b/tuple/include/theta_intersection_experimental.hpp
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_INTERSECTION_EXPERIMENTAL_HPP_
+#define THETA_INTERSECTION_EXPERIMENTAL_HPP_
+
+#include "theta_sketch_experimental.hpp"
+#include "theta_intersection_base.hpp"
+
+namespace datasketches {
+
+template<typename Allocator = std::allocator<uint64_t>>
+class theta_intersection_experimental {
+public:
+ using Entry = uint64_t;
+ using ExtractKey = trivial_extract_key;
+ using Sketch = theta_sketch_experimental<Allocator>;
+ using CompactSketch = compact_theta_sketch_experimental<Allocator>;
+
+ struct pass_through_policy {
+ uint64_t operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
+ unused(incoming_entry);
+ return internal_entry;
+ }
+ };
+ using State = theta_intersection_base<Entry, ExtractKey, pass_through_policy, Sketch, CompactSketch, Allocator>;
+
+ explicit theta_intersection_experimental(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
+
+ /**
+ * Updates the intersection with a given sketch.
+ * The intersection can be viewed as starting from the "universe" set, and every update
+ * can reduce the current set to leave the overlapping subset only.
+ * @param sketch represents input set for the intersection
+ */
+ template<typename FwdSketch>
+ void update(FwdSketch&& sketch);
+
+ /**
+ * Produces a copy of the current state of the intersection.
+ * If update() was not called, the state is the infinite "universe",
+ * which is considered an undefined state, and throws an exception.
+ * @param ordered optional flag to specify if ordered sketch should be produced
+ * @return the result of the intersection
+ */
+ CompactSketch get_result(bool ordered = true) const;
+
+ /**
+ * Returns true if the state of the intersection is defined (not infinite "universe").
+ * @return true if the state is valid
+ */
+ bool has_result() const;
+
+private:
+ State state_;
+};
+
+} /* namespace datasketches */
+
+#include "theta_intersection_experimental_impl.hpp"
+
+#endif
diff --git a/tuple/include/theta_intersection_experimental_impl.hpp b/tuple/include/theta_intersection_experimental_impl.hpp
new file mode 100644
index 0000000..e8bcfbb
--- /dev/null
+++ b/tuple/include/theta_intersection_experimental_impl.hpp
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+namespace datasketches {
+
+template<typename A>
+theta_intersection_experimental<A>::theta_intersection_experimental(uint64_t seed, const A& allocator):
+state_(seed, pass_through_policy(), allocator)
+{}
+
+template<typename A>
+template<typename SS>
+void theta_intersection_experimental<A>::update(SS&& sketch) {
+ state_.update(std::forward<SS>(sketch));
+}
+
+template<typename A>
+auto theta_intersection_experimental<A>::get_result(bool ordered) const -> CompactSketch {
+ return state_.get_result(ordered);
+}
+
+template<typename A>
+bool theta_intersection_experimental<A>::has_result() const {
+ return state_.has_result();
+}
+
+} /* namespace datasketches */
diff --git a/tuple/include/theta_union_experimental.hpp b/tuple/include/theta_union_experimental.hpp
index 8f93248..838d8bc 100644
--- a/tuple/include/theta_union_experimental.hpp
+++ b/tuple/include/theta_union_experimental.hpp
@@ -29,13 +29,6 @@
// experimental theta union derived from the same base as tuple union
-struct pass_through_policy {
- uint64_t operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
- unused(incoming_entry);
- return internal_entry;
- }
-};
-
template<typename Allocator = std::allocator<uint64_t>>
class theta_union_experimental {
public:
@@ -45,6 +38,12 @@
using CompactSketch = compact_theta_sketch_experimental<Allocator>;
using resize_factor = theta_constants::resize_factor;
+ struct pass_through_policy {
+ uint64_t operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
+ unused(incoming_entry);
+ return internal_entry;
+ }
+ };
using State = theta_union_base<Entry, ExtractKey, pass_through_policy, Sketch, CompactSketch, Allocator>;
// No constructor here. Use builder instead.
diff --git a/tuple/include/tuple_a_not_b.hpp b/tuple/include/tuple_a_not_b.hpp
index 923e2e1..024406b 100644
--- a/tuple/include/tuple_a_not_b.hpp
+++ b/tuple/include/tuple_a_not_b.hpp
@@ -38,7 +38,7 @@
using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
using State = theta_set_difference_base<Entry, ExtractKey, Sketch, CompactSketch, AllocEntry>;
- explicit tuple_a_not_b(uint64_t seed = DEFAULT_SEED);
+ explicit tuple_a_not_b(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
/**
* Computes the a-not-b set operation given two sketches.
diff --git a/tuple/include/tuple_a_not_b_impl.hpp b/tuple/include/tuple_a_not_b_impl.hpp
index 4ba2e41..b7873c0 100644
--- a/tuple/include/tuple_a_not_b_impl.hpp
+++ b/tuple/include/tuple_a_not_b_impl.hpp
@@ -20,8 +20,8 @@
namespace datasketches {
template<typename S, typename A>
-tuple_a_not_b<S, A>::tuple_a_not_b(uint64_t seed):
-state_(seed)
+tuple_a_not_b<S, A>::tuple_a_not_b(uint64_t seed, const A& allocator):
+state_(seed, allocator)
{}
template<typename S, typename A>
diff --git a/tuple/test/CMakeLists.txt b/tuple/test/CMakeLists.txt
index 53a3a7e..90b6713 100644
--- a/tuple/test/CMakeLists.txt
+++ b/tuple/test/CMakeLists.txt
@@ -43,7 +43,9 @@
tuple_union_test.cpp
tuple_intersection_test.cpp
tuple_a_not_b_test.cpp
+ array_of_doubles_sketch_test.cpp
theta_sketch_experimental_test.cpp
theta_union_experimental_test.cpp
- array_of_doubles_sketch_test.cpp
+ theta_intersection_experimental_test.cpp
+ theta_a_not_b_experimental_test.cpp
)
diff --git a/tuple/test/theta_a_not_b_experimental_test.cpp b/tuple/test/theta_a_not_b_experimental_test.cpp
new file mode 100644
index 0000000..6b44f8b
--- /dev/null
+++ b/tuple/test/theta_a_not_b_experimental_test.cpp
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <catch.hpp>
+
+#include <theta_a_not_b_experimental.hpp>
+
+namespace datasketches {
+
+// These tests have been copied from the existing theta sketch implementation.
+
+using update_theta_sketch = update_theta_sketch_experimental<>;
+using compact_theta_sketch = compact_theta_sketch_experimental<>;
+using theta_a_not_b = theta_a_not_b_experimental<>;
+
+TEST_CASE("theta a-not-b: empty", "[theta_a_not_b]") {
+ theta_a_not_b a_not_b;
+ auto a = update_theta_sketch::builder().build();
+ auto b = update_theta_sketch::builder().build();
+ compact_theta_sketch result = a_not_b.compute(a, b);
+ REQUIRE(result.get_num_retained() == 0);
+ REQUIRE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta a-not-b: non empty no retained keys", "[theta_a_not_b]") {
+ update_theta_sketch a = update_theta_sketch::builder().build();
+ a.update(1);
+ update_theta_sketch b = update_theta_sketch::builder().set_p(0.001).build();
+ theta_a_not_b a_not_b;
+
+ // B is still empty
+ compact_theta_sketch result = a_not_b.compute(a, b);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_num_retained() == 1);
+ REQUIRE(result.get_theta() == Approx(1).margin(1e-10));
+ REQUIRE(result.get_estimate() == 1.0);
+
+ // B is not empty in estimation mode and no entries
+ b.update(1);
+ REQUIRE(b.get_num_retained() == 0U);
+
+ result = a_not_b.compute(a, b);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_num_retained() == 0);
+ REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta a-not-b: exact mode half overlap", "[theta_a_not_b]") {
+ update_theta_sketch a = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 1000; i++) a.update(value++);
+
+ update_theta_sketch b = update_theta_sketch::builder().build();
+ value = 500;
+ for (int i = 0; i < 1000; i++) b.update(value++);
+
+ theta_a_not_b a_not_b;
+
+ // unordered inputs, ordered result
+ compact_theta_sketch result = a_not_b.compute(a, b);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.is_ordered());
+ REQUIRE(result.get_estimate() == 500.0);
+
+ // unordered inputs, unordered result
+ result = a_not_b.compute(a, b, false);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE_FALSE(result.is_ordered());
+ REQUIRE(result.get_estimate() == 500.0);
+
+ // ordered inputs
+ result = a_not_b.compute(a.compact(), b.compact());
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.is_ordered());
+ REQUIRE(result.get_estimate() == 500.0);
+
+ // A is ordered, so the result is ordered regardless
+ result = a_not_b.compute(a.compact(), b, false);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.is_ordered());
+ REQUIRE(result.get_estimate() == 500.0);
+}
+
+TEST_CASE("theta a-not-b: exact mode disjoint", "[theta_a_not_b]") {
+ update_theta_sketch a = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 1000; i++) a.update(value++);
+
+ update_theta_sketch b = update_theta_sketch::builder().build();
+ for (int i = 0; i < 1000; i++) b.update(value++);
+
+ theta_a_not_b a_not_b;
+
+ // unordered inputs
+ compact_theta_sketch result = a_not_b.compute(a, b);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 1000.0);
+
+ // ordered inputs
+ result = a_not_b.compute(a.compact(), b.compact());
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 1000.0);
+}
+
+TEST_CASE("theta a-not-b: exact mode full overlap", "[theta_a_not_b]") {
+ update_theta_sketch sketch = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 1000; i++) sketch.update(value++);
+
+ theta_a_not_b a_not_b;
+
+ // unordered inputs
+ compact_theta_sketch result = a_not_b.compute(sketch, sketch);
+ REQUIRE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+
+ // ordered inputs
+ result = a_not_b.compute(sketch.compact(), sketch.compact());
+ REQUIRE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta a-not-b: estimation mode half overlap", "[theta_a_not_b]") {
+ update_theta_sketch a = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 10000; i++) a.update(value++);
+
+ update_theta_sketch b = update_theta_sketch::builder().build();
+ value = 5000;
+ for (int i = 0; i < 10000; i++) b.update(value++);
+
+ theta_a_not_b a_not_b;
+
+ // unordered inputs
+ compact_theta_sketch result = a_not_b.compute(a, b);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
+
+ // ordered inputs
+ result = a_not_b.compute(a.compact(), b.compact());
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
+}
+
+TEST_CASE("theta a-not-b: estimation mode disjoint", "[theta_a_not_b]") {
+ update_theta_sketch a = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 10000; i++) a.update(value++);
+
+ update_theta_sketch b = update_theta_sketch::builder().build();
+ for (int i = 0; i < 10000; i++) b.update(value++);
+
+ theta_a_not_b a_not_b;
+
+ // unordered inputs
+ compact_theta_sketch result = a_not_b.compute(a, b);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == Approx(10000).margin(10000 * 0.02));
+
+ // ordered inputs
+ result = a_not_b.compute(a.compact(), b.compact());
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == Approx(10000).margin(10000 * 0.02));
+}
+
+TEST_CASE("theta a-not-b: estimation mode full overlap", "[theta_a_not_b]") {
+ update_theta_sketch sketch = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 10000; i++) sketch.update(value++);
+
+ theta_a_not_b a_not_b;
+
+ // unordered inputs
+ compact_theta_sketch result = a_not_b.compute(sketch, sketch);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+
+ // ordered inputs
+ result = a_not_b.compute(sketch.compact(), sketch.compact());
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta a-not-b: seed mismatch", "[theta_a_not_b]") {
+ update_theta_sketch sketch = update_theta_sketch::builder().build();
+ sketch.update(1); // non-empty should not be ignored
+ theta_a_not_b a_not_b(123);
+ REQUIRE_THROWS_AS(a_not_b.compute(sketch, sketch), std::invalid_argument);
+}
+
+TEST_CASE("theta a-not-b: issue #152", "[theta_a_not_b]") {
+ update_theta_sketch a = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 10000; i++) a.update(value++);
+
+ update_theta_sketch b = update_theta_sketch::builder().build();
+ value = 5000;
+ for (int i = 0; i < 25000; i++) b.update(value++);
+
+ theta_a_not_b a_not_b;
+
+ // unordered inputs
+ compact_theta_sketch result = a_not_b.compute(a, b);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.03));
+
+ // ordered inputs
+ result = a_not_b.compute(a.compact(), b.compact());
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.03));
+}
+
+} /* namespace datasketches */
diff --git a/tuple/test/theta_intersection_experimental_test.cpp b/tuple/test/theta_intersection_experimental_test.cpp
new file mode 100644
index 0000000..3337636
--- /dev/null
+++ b/tuple/test/theta_intersection_experimental_test.cpp
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <catch.hpp>
+
+#include <theta_intersection_experimental.hpp>
+
+namespace datasketches {
+
+// These tests have been copied from the existing theta sketch implementation.
+
+using update_theta_sketch = update_theta_sketch_experimental<>;
+using compact_theta_sketch = compact_theta_sketch_experimental<>;
+using theta_intersection = theta_intersection_experimental<>;
+
+TEST_CASE("theta intersection: invalid", "[theta_intersection]") {
+ theta_intersection intersection;
+ REQUIRE_FALSE(intersection.has_result());
+ REQUIRE_THROWS_AS(intersection.get_result(), std::invalid_argument);
+}
+
+TEST_CASE("theta intersection: empty", "[theta_intersection]") {
+ theta_intersection intersection;
+ update_theta_sketch sketch = update_theta_sketch::builder().build();
+ intersection.update(sketch);
+ compact_theta_sketch result = intersection.get_result();
+ REQUIRE(result.get_num_retained() == 0);
+ REQUIRE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+
+ intersection.update(sketch);
+ result = intersection.get_result();
+ REQUIRE(result.get_num_retained() == 0);
+ REQUIRE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta intersection: non empty no retained keys", "[theta_intersection]") {
+ update_theta_sketch sketch = update_theta_sketch::builder().set_p(0.001).build();
+ sketch.update(1);
+ theta_intersection intersection;
+ intersection.update(sketch);
+ compact_theta_sketch result = intersection.get_result();
+ REQUIRE(result.get_num_retained() == 0);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
+ REQUIRE(result.get_estimate() == 0.0);
+
+ intersection.update(sketch);
+ result = intersection.get_result();
+ REQUIRE(result.get_num_retained() == 0);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta intersection: exact mode half overlap unordered", "[theta_intersection]") {
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 1000; i++) sketch1.update(value++);
+
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+ value = 500;
+ for (int i = 0; i < 1000; i++) sketch2.update(value++);
+
+ theta_intersection intersection;
+ intersection.update(sketch1);
+ intersection.update(sketch2);
+ compact_theta_sketch result = intersection.get_result();
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 500.0);
+}
+
+TEST_CASE("theta intersection: exact mode half overlap ordered", "[theta_intersection]") {
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 1000; i++) sketch1.update(value++);
+
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+ value = 500;
+ for (int i = 0; i < 1000; i++) sketch2.update(value++);
+
+ theta_intersection intersection;
+ intersection.update(sketch1.compact());
+ intersection.update(sketch2.compact());
+ compact_theta_sketch result = intersection.get_result();
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 500.0);
+}
+
+TEST_CASE("theta intersection: exact mode disjoint unordered", "[theta_intersection]") {
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 1000; i++) sketch1.update(value++);
+
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+ for (int i = 0; i < 1000; i++) sketch2.update(value++);
+
+ theta_intersection intersection;
+ intersection.update(sketch1);
+ intersection.update(sketch2);
+ compact_theta_sketch result = intersection.get_result();
+ REQUIRE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta intersection: exact mode disjoint ordered", "[theta_intersection]") {
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 1000; i++) sketch1.update(value++);
+
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+ for (int i = 0; i < 1000; i++) sketch2.update(value++);
+
+ theta_intersection intersection;
+ intersection.update(sketch1.compact());
+ intersection.update(sketch2.compact());
+ compact_theta_sketch result = intersection.get_result();
+ REQUIRE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta intersection: estimation mode half overlap unordered", "[theta_intersection]") {
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 10000; i++) sketch1.update(value++);
+
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+ value = 5000;
+ for (int i = 0; i < 10000; i++) sketch2.update(value++);
+
+ theta_intersection intersection;
+ intersection.update(sketch1);
+ intersection.update(sketch2);
+ compact_theta_sketch result = intersection.get_result();
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
+}
+
+TEST_CASE("theta intersection: estimation mode half overlap ordered", "[theta_intersection]") {
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 10000; i++) sketch1.update(value++);
+
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+ value = 5000;
+ for (int i = 0; i < 10000; i++) sketch2.update(value++);
+
+ theta_intersection intersection;
+ intersection.update(sketch1.compact());
+ intersection.update(sketch2.compact());
+ compact_theta_sketch result = intersection.get_result();
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
+}
+
+TEST_CASE("theta intersection: estimation mode disjoint unordered", "[theta_intersection]") {
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 10000; i++) sketch1.update(value++);
+
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+ for (int i = 0; i < 10000; i++) sketch2.update(value++);
+
+ theta_intersection intersection;
+ intersection.update(sketch1);
+ intersection.update(sketch2);
+ compact_theta_sketch result = intersection.get_result();
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta intersection: estimation mode disjoint ordered", "[theta_intersection]") {
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 10000; i++) sketch1.update(value++);
+
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+ for (int i = 0; i < 10000; i++) sketch2.update(value++);
+
+ theta_intersection intersection;
+ intersection.update(sketch1.compact());
+ intersection.update(sketch2.compact());
+ compact_theta_sketch result = intersection.get_result();
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta intersection: seed mismatch", "[theta_intersection]") {
+ update_theta_sketch sketch = update_theta_sketch::builder().build();
+ sketch.update(1); // non-empty should not be ignored
+ theta_intersection intersection(123);
+ REQUIRE_THROWS_AS(intersection.update(sketch), std::invalid_argument);
+}
+
+} /* namespace datasketches */
diff --git a/tuple/test/theta_sketch_experimental_test.cpp b/tuple/test/theta_sketch_experimental_test.cpp
index e0ccdf3..61435a4 100644
--- a/tuple/test/theta_sketch_experimental_test.cpp
+++ b/tuple/test/theta_sketch_experimental_test.cpp
@@ -31,6 +31,9 @@
const std::string inputPath = "test/";
#endif
+// These tests have been copied from the existing theta sketch implementation.
+// Serialization as base class and serialization of update sketch have been removed.
+
using update_theta_sketch = update_theta_sketch_experimental<>;
using compact_theta_sketch = compact_theta_sketch_experimental<>;