tuple union moving update
diff --git a/tuple/include/theta_union_base.hpp b/tuple/include/theta_union_base.hpp
index 0b9a1b8..fbe4cf7 100644
--- a/tuple/include/theta_union_base.hpp
+++ b/tuple/include/theta_union_base.hpp
@@ -40,7 +40,9 @@
theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const Policy& policy);
+ // TODO: try to avoid duplication
void update(const Sketch& sketch);
+ void update(Sketch&& sketch);
CompactSketch get_result(bool ordered = true) const;
diff --git a/tuple/include/theta_union_base_impl.hpp b/tuple/include/theta_union_base_impl.hpp
index 10e4478..63c0f0f 100644
--- a/tuple/include/theta_union_base_impl.hpp
+++ b/tuple/include/theta_union_base_impl.hpp
@@ -41,7 +41,29 @@
if (!result.second) {
table_.insert(result.first, entry);
} else {
- *result.first = policy_(*result.first, entry);
+ policy_(*result.first, entry);
+ }
+ } else {
+ if (sketch.is_ordered()) break; // early stop
+ }
+ }
+ if (table_.theta_ < union_theta_) union_theta_ = table_.theta_;
+}
+
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+void theta_union_base<EN, EK, P, S, CS, A>::update(S&& sketch) {
+ if (sketch.is_empty()) return;
+ if (sketch.get_seed_hash() != compute_seed_hash(table_.seed_)) throw std::invalid_argument("seed hash mismatch");
+ table_.is_empty_ = false;
+ if (sketch.get_theta64() < union_theta_) union_theta_ = sketch.get_theta64();
+ for (auto& entry: sketch) {
+ const uint64_t hash = EK()(entry);
+ if (hash < union_theta_) {
+ auto result = table_.find(hash);
+ if (!result.second) {
+ table_.insert(result.first, std::move(entry));
+ } else {
+ policy_(*result.first, std::move(entry));
}
} else {
if (sketch.is_ordered()) break; // early stop
diff --git a/tuple/include/theta_update_sketch_base.hpp b/tuple/include/theta_update_sketch_base.hpp
index 6c1e441..1356c21 100644
--- a/tuple/include/theta_update_sketch_base.hpp
+++ b/tuple/include/theta_update_sketch_base.hpp
@@ -200,7 +200,23 @@
return hashes.h1;
}
-// iterator
+// iterators
+
+template<typename Entry, typename ExtractKey>
+class theta_iterator: public std::iterator<std::input_iterator_tag, Entry> {
+public:
+ theta_iterator(Entry* entries, uint32_t size, uint32_t index);
+ theta_iterator& operator++();
+ theta_iterator operator++(int);
+ bool operator==(const theta_iterator& other) const;
+ bool operator!=(const theta_iterator& other) const;
+ Entry& operator*() const;
+
+private:
+ Entry* entries_;
+ uint32_t size_;
+ uint32_t index_;
+};
template<typename Entry, typename ExtractKey>
class theta_const_iterator: public std::iterator<std::input_iterator_tag, Entry> {
diff --git a/tuple/include/theta_update_sketch_base_impl.hpp b/tuple/include/theta_update_sketch_base_impl.hpp
index 60d4084..78b4cc9 100644
--- a/tuple/include/theta_update_sketch_base_impl.hpp
+++ b/tuple/include/theta_update_sketch_base_impl.hpp
@@ -228,6 +228,43 @@
// iterator
template<typename Entry, typename ExtractKey>
+theta_iterator<Entry, ExtractKey>::theta_iterator(Entry* entries, uint32_t size, uint32_t index):
+entries_(entries), size_(size), index_(index) {
+ while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) ++index_;
+}
+
+template<typename Entry, typename ExtractKey>
+auto theta_iterator<Entry, ExtractKey>::operator++() -> theta_iterator& {
+ ++index_;
+ while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) ++index_;
+ return *this;
+}
+
+template<typename Entry, typename ExtractKey>
+auto theta_iterator<Entry, ExtractKey>::operator++(int) -> theta_iterator {
+ theta_iterator tmp(*this);
+ operator++();
+ return tmp;
+}
+
+template<typename Entry, typename ExtractKey>
+bool theta_iterator<Entry, ExtractKey>::operator!=(const theta_iterator& other) const {
+ return index_ != other.index_;
+}
+
+template<typename Entry, typename ExtractKey>
+bool theta_iterator<Entry, ExtractKey>::operator==(const theta_iterator& other) const {
+ return index_ == other.index_;
+}
+
+template<typename Entry, typename ExtractKey>
+auto theta_iterator<Entry, ExtractKey>::operator*() const -> Entry& {
+ return entries_[index_];
+}
+
+// const iterator
+
+template<typename Entry, typename ExtractKey>
theta_const_iterator<Entry, ExtractKey>::theta_const_iterator(const Entry* entries, uint32_t size, uint32_t index):
entries_(entries), size_(size), index_(index) {
while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) ++index_;
diff --git a/tuple/include/tuple_sketch.hpp b/tuple/include/tuple_sketch.hpp
index 5ecb044..3409557 100644
--- a/tuple/include/tuple_sketch.hpp
+++ b/tuple/include/tuple_sketch.hpp
@@ -40,6 +40,7 @@
public:
using Entry = std::pair<uint64_t, Summary>;
using ExtractKey = pair_extract_key<uint64_t, Summary>;
+ using iterator = theta_iterator<Entry, ExtractKey>;
using const_iterator = theta_const_iterator<Entry, ExtractKey>;
virtual ~tuple_sketch() = default;
@@ -109,13 +110,26 @@
* Iterator over entries in this sketch.
* @return begin iterator
*/
- virtual const_iterator begin() const = 0;
+ virtual iterator begin() = 0;
/**
* Iterator pointing past the valid range.
* Not to be incremented or dereferenced.
* @return end iterator
*/
+ virtual iterator end() = 0;
+
+ /**
+ * Const iterator over entries in this sketch.
+ * @return begin const iterator
+ */
+ virtual const_iterator begin() const = 0;
+
+ /**
+ * Const iterator pointing past the valid range.
+ * Not to be incremented or dereferenced.
+ * @return end const iterator
+ */
virtual const_iterator end() const = 0;
protected:
@@ -152,6 +166,7 @@
using Base = tuple_sketch<Summary, Allocator>;
using Entry = typename Base::Entry;
using ExtractKey = typename Base::ExtractKey;
+ using iterator = typename Base::iterator;
using const_iterator = typename Base::const_iterator;
using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
using tuple_map = theta_update_sketch_base<Entry, ExtractKey, AllocEntry>;
@@ -294,6 +309,8 @@
*/
compact_tuple_sketch<Summary, Allocator> compact(bool ordered = true) const;
+ virtual iterator begin();
+ virtual iterator end();
virtual const_iterator begin() const;
virtual const_iterator end() const;
@@ -316,6 +333,7 @@
using Base = tuple_sketch<Summary, Allocator>;
using Entry = typename Base::Entry;
using ExtractKey = typename Base::ExtractKey;
+ using iterator = typename Base::iterator;
using const_iterator = typename Base::const_iterator;
using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
using AllocU64 = typename std::allocator_traits<Allocator>::template rebind_alloc<uint64_t>;
@@ -347,6 +365,8 @@
template<typename SerDe = serde<Summary>>
vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
+ virtual iterator begin();
+ virtual iterator end();
virtual const_iterator begin() const;
virtual const_iterator end() const;
diff --git a/tuple/include/tuple_sketch_impl.hpp b/tuple/include/tuple_sketch_impl.hpp
index 2ae9f0b..ae09cfe 100644
--- a/tuple/include/tuple_sketch_impl.hpp
+++ b/tuple/include/tuple_sketch_impl.hpp
@@ -199,6 +199,16 @@
}
template<typename S, typename U, typename P, typename A>
+auto update_tuple_sketch<S, U, P, A>::begin() -> iterator {
+ return iterator(map_.entries_, 1 << map_.lg_cur_size_, 0);
+}
+
+template<typename S, typename U, typename P, typename A>
+auto update_tuple_sketch<S, U, P, A>::end() -> iterator {
+ return iterator(nullptr, 0, 1 << map_.lg_cur_size_);
+}
+
+template<typename S, typename U, typename P, typename A>
auto update_tuple_sketch<S, U, P, A>::begin() const -> const_iterator {
return const_iterator(map_.entries_, 1 << map_.lg_cur_size_, 0);
}
@@ -426,6 +436,16 @@
}
template<typename S, typename A>
+auto compact_tuple_sketch<S, A>::begin() -> iterator {
+ return iterator(entries_.data(), entries_.size(), 0);
+}
+
+template<typename S, typename A>
+auto compact_tuple_sketch<S, A>::end() -> iterator {
+ return iterator(nullptr, 0, entries_.size());
+}
+
+template<typename S, typename A>
auto compact_tuple_sketch<S, A>::begin() const -> const_iterator {
return const_iterator(entries_.data(), entries_.size(), 0);
}
diff --git a/tuple/include/tuple_union.hpp b/tuple/include/tuple_union.hpp
index 1e7fbb3..0ee776c 100644
--- a/tuple/include/tuple_union.hpp
+++ b/tuple/include/tuple_union.hpp
@@ -51,9 +51,11 @@
// in terms of operations on Entry
struct internal_policy {
internal_policy(const Policy& policy): policy_(policy) {}
- Entry& operator()(Entry& internal_entry, const Entry& incoming_entry) const {
+ void operator()(Entry& internal_entry, const Entry& incoming_entry) const {
policy_(internal_entry.second, incoming_entry.second);
- return internal_entry;
+ }
+ void operator()(Entry& internal_entry, Entry&& incoming_entry) const {
+ policy_(internal_entry.second, std::move(incoming_entry.second));
}
Policy policy_;
};
@@ -67,7 +69,8 @@
* This method is to update the union with a given sketch
* @param sketch to update the union with
*/
- void update(const Sketch& sketch);
+ template<typename FwdSketch>
+ void update(FwdSketch&& sketch);
/**
* This method produces a copy of the current state of the union as a compact sketch.
diff --git a/tuple/include/tuple_union_impl.hpp b/tuple/include/tuple_union_impl.hpp
index aa45a31..9f471ac 100644
--- a/tuple/include/tuple_union_impl.hpp
+++ b/tuple/include/tuple_union_impl.hpp
@@ -25,8 +25,9 @@
{}
template<typename S, typename P, typename A>
-void tuple_union<S, P, A>::update(const Sketch& sketch) {
- state_.update(sketch);
+template<typename SS>
+void tuple_union<S, P, A>::update(SS&& sketch) {
+ state_.update(std::forward<SS>(sketch));
}
template<typename S, typename P, typename A>