blob: cd627cfe39e522ac9709f9436300aa28e26e0daf [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <parallel_hashmap/phmap.h>
#include <boost/noncopyable.hpp>
#include <span>
#include "vec/common/hash_table/hash.h"
#include "vec/common/hash_table/phmap_fwd_decl.h"
template <typename Key, typename Mapped>
ALWAYS_INLINE inline auto lookup_result_get_mapped(std::pair<const Key, Mapped>* it) {
return &(it->second);
}
template <typename Key, typename Mapped, typename HashMethod = DefaultHash<Key>>
class PHHashMap : private boost::noncopyable {
public:
using Self = PHHashMap;
using Hash = HashMethod;
using cell_type = std::pair<const Key, Mapped>;
using HashMapImpl = doris::vectorized::flat_hash_map<Key, Mapped, Hash>;
using key_type = Key;
using mapped_type = Mapped;
using Value = Mapped;
using value_type = std::pair<const Key, Mapped>;
using LookupResult = std::pair<const Key, Mapped>*;
using ConstLookupResult = const std::pair<const Key, Mapped>*;
using const_iterator_impl = typename HashMapImpl::const_iterator;
using iterator_impl = typename HashMapImpl::iterator;
PHHashMap() = default;
PHHashMap(size_t reserve_for_num_elements) { _hash_map.reserve(reserve_for_num_elements); }
PHHashMap(PHHashMap&& other) { *this = std::move(other); }
PHHashMap& operator=(PHHashMap&& rhs) {
_hash_map.clear();
_hash_map = std::move(rhs._hash_map);
return *this;
}
template <typename Derived, bool is_const>
class iterator_base {
using BaseIterator = std::conditional_t<is_const, const_iterator_impl, iterator_impl>;
BaseIterator base_iterator;
friend class PHHashMap;
public:
iterator_base() {}
iterator_base(BaseIterator it) : base_iterator(it) {}
bool operator==(const iterator_base& rhs) const {
return base_iterator == rhs.base_iterator;
}
bool operator!=(const iterator_base& rhs) const {
return base_iterator != rhs.base_iterator;
}
Derived& operator++() {
base_iterator++;
return static_cast<Derived&>(*this);
}
auto& operator*() const { return *this; }
auto* operator->() const { return this; }
auto& operator*() { return *this; }
auto* operator->() { return this; }
const auto& get_first() const { return base_iterator->first; }
const auto& get_second() const { return base_iterator->second; }
auto& get_second() { return base_iterator->second; }
auto get_ptr() const { return this; }
size_t get_hash() const { return base_iterator->get_hash(); }
};
class iterator : public iterator_base<iterator, false> {
public:
using iterator_base<iterator, false>::iterator_base;
};
class const_iterator : public iterator_base<const_iterator, true> {
public:
using iterator_base<const_iterator, true>::iterator_base;
};
const_iterator begin() const { return const_iterator(_hash_map.cbegin()); }
const_iterator cbegin() const { return const_iterator(_hash_map.cbegin()); }
iterator begin() { return iterator(_hash_map.begin()); }
const_iterator end() const { return const_iterator(_hash_map.cend()); }
const_iterator cend() const { return const_iterator(_hash_map.cend()); }
iterator end() { return iterator(_hash_map.end()); }
template <typename KeyHolder>
void ALWAYS_INLINE emplace(KeyHolder&& key_holder, LookupResult& it, bool& inserted) {
inserted = false;
it = &*_hash_map.lazy_emplace(key_holder, [&](const auto& ctor) {
inserted = true;
ctor(key_holder, nullptr);
});
}
template <typename KeyHolder, typename Func>
void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, Func&& f) {
it = &*_hash_map.lazy_emplace(key_holder, [&](const auto& ctor) { f(ctor, key_holder); });
}
template <typename KeyHolder>
void ALWAYS_INLINE emplace(KeyHolder&& key, LookupResult& it, bool& inserted,
size_t hash_value) {
inserted = false;
it = &*_hash_map.lazy_emplace_with_hash(key, hash_value, [&](const auto& ctor) {
inserted = true;
if constexpr (std::is_pointer_v<std::remove_reference_t<mapped_type>>) {
ctor(key, nullptr);
} else {
ctor(key, mapped_type());
}
});
}
template <typename KeyHolder, typename Func>
void ALWAYS_INLINE lazy_emplace(KeyHolder&& key, LookupResult& it, size_t hash_value,
Func&& f) {
it = &*_hash_map.lazy_emplace_with_hash(key, hash_value,
[&](const auto& ctor) { f(ctor, key, key); });
}
void ALWAYS_INLINE insert(const Key& key, const Mapped& value) {
_hash_map.lazy_emplace(key, [&](const auto& ctor) { ctor(key, value); });
}
void insert(const iterator& other_iter) {
insert(other_iter->get_first(), other_iter->get_second());
}
template <typename KeyHolder>
LookupResult ALWAYS_INLINE find(KeyHolder&& key) {
auto it = _hash_map.find(key);
return it != _hash_map.end() ? &*it : nullptr;
}
template <typename KeyHolder>
LookupResult ALWAYS_INLINE find(KeyHolder&& key, size_t hash_value) {
auto it = _hash_map.find(key, hash_value);
return it != _hash_map.end() ? &*it : nullptr;
}
size_t hash(const Key& x) const { return _hash_map.hash(x); }
template <bool read>
void ALWAYS_INLINE prefetch(const Key& key, size_t hash_value) {
_hash_map.prefetch_hash(hash_value);
}
/// Call func(Mapped &) for each hash map element.
template <typename Func>
void for_each_mapped(Func&& func) {
for (auto& v : *this) func(v.get_second());
}
size_t get_buffer_size_in_bytes() const {
const auto capacity = _hash_map.capacity();
return capacity * sizeof(typename HashMapImpl::slot_type);
}
bool add_elem_size_overflow(size_t row) const {
const auto capacity = _hash_map.capacity();
// phmap use 7/8th as maximum load factor.
return (_hash_map.size() + row) > (capacity * 7 / 8);
}
size_t estimate_memory(size_t num_elem) const {
if (!add_elem_size_overflow(num_elem)) {
return 0;
}
auto new_size = _hash_map.capacity() * 2 + 1;
return phmap::priv::hashtable_debug_internal::HashtableDebugAccess<
HashMapImpl>::LowerBoundAllocatedByteSize(new_size);
}
size_t size() const { return _hash_map.size(); }
template <typename MappedType>
char* get_null_key_data() {
return nullptr;
}
bool has_null_key_data() const { return false; }
bool empty() const { return _hash_map.empty(); }
void clear_and_shrink() { _hash_map.clear(); }
void reserve(size_t num_elem) { _hash_map.reserve(num_elem); }
private:
HashMapImpl _hash_map;
};