blob: e3e7bfbd6ce332a2ad1d59bb832eccb8e7e33bbb [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#ifndef REVERSE_PURGE_HASH_MAP_HPP_
#define REVERSE_PURGE_HASH_MAP_HPP_
#include <memory>
#include <iterator>
namespace datasketches {
/*
* This is a specialized linear-probing hash map with a reverse purge operation
* that removes all entries in the map with values that are less than zero.
* Based on Java implementation here:
* https://github.com/DataSketches/sketches-core/blob/master/src/main/java/com/yahoo/sketches/frequencies/ReversePurgeItemHashMap.java
* author Alexander Saydakov
*/
template<typename K, typename V = uint64_t, typename H = std::hash<K>, typename E = std::equal_to<K>, typename A = std::allocator<K>>
class reverse_purge_hash_map {
public:
using AllocV = typename std::allocator_traits<A>::template rebind_alloc<V>;
using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>;
reverse_purge_hash_map(uint8_t lg_size, uint8_t lg_max_size);
reverse_purge_hash_map(const reverse_purge_hash_map& other);
reverse_purge_hash_map(reverse_purge_hash_map&& other) noexcept;
~reverse_purge_hash_map();
reverse_purge_hash_map& operator=(reverse_purge_hash_map other);
reverse_purge_hash_map& operator=(reverse_purge_hash_map&& other);
V adjust_or_insert(const K& key, V value);
V adjust_or_insert(K&& key, V value);
V get(const K& key) const;
uint8_t get_lg_cur_size() const;
uint8_t get_lg_max_size() const;
uint32_t get_capacity() const;
uint32_t get_num_active() const;
class iterator;
iterator begin() const;
iterator end() const;
private:
static constexpr double LOAD_FACTOR = 0.75;
static constexpr uint16_t DRIFT_LIMIT = 1024; // used only for stress testing
static constexpr uint32_t MAX_SAMPLE_SIZE = 1024; // number of samples to compute approximate median during purge
uint8_t lg_cur_size;
uint8_t lg_max_size;
uint32_t num_active;
K* keys;
V* values;
uint16_t* states;
inline bool is_active(uint32_t probe) const;
void subtract_and_keep_positive_only(V amount);
void hash_delete(uint32_t probe);
uint32_t internal_adjust_or_insert(const K& key, V value);
V resize_or_purge_if_needed();
void resize(uint8_t lg_new_size);
V purge();
};
// This iterator uses strides based on golden ratio to avoid clustering during merge
template<typename K, typename V, typename H, typename E, typename A>
class reverse_purge_hash_map<K, V, H, E, A>::iterator: public std::iterator<std::input_iterator_tag, K> {
public:
friend class reverse_purge_hash_map<K, V, H, E, A>;
iterator& operator++() {
++count;
if (count < map->num_active) {
const uint32_t mask = (1 << map->lg_cur_size) - 1;
do {
index = (index + stride) & mask;
} while (!map->is_active(index));
}
return *this;
}
iterator operator++(int) { iterator tmp(*this); operator++(); return tmp; }
bool operator==(const iterator& rhs) const { return count == rhs.count; }
bool operator!=(const iterator& rhs) const { return count != rhs.count; }
const std::pair<K&, V> operator*() const {
return std::pair<K&, V>(map->keys[index], map->values[index]);
}
private:
static constexpr double GOLDEN_RATIO_RECIPROCAL = 0.6180339887498949; // = (sqrt(5) - 1) / 2
const reverse_purge_hash_map<K, V, H, E, A>* map;
uint32_t index;
uint32_t count;
uint32_t stride;
iterator(const reverse_purge_hash_map<K, V, H, E, A>* map, uint32_t index, uint32_t count):
map(map), index(index), count(count), stride(static_cast<uint32_t>((1 << map->lg_cur_size) * GOLDEN_RATIO_RECIPROCAL) | 1) {}
};
} /* namespace datasketches */
#include "reverse_purge_hash_map_impl.hpp"
#endif