blob: 8c30218755c53d0b95218c588afbea5bafccde03 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <boost/core/noncopyable.hpp>
#include <cstdint>
#include "vec/common/hash_table/hash.h"
#include "vec/common/hash_table/phmap_fwd_decl.h"
ALWAYS_INLINE inline void* lookup_result_get_mapped(void*) {
return nullptr;
}
template <typename Key, typename HashMethod = DefaultHash<Key>>
class PHHashSet : private boost::noncopyable {
public:
using Self = PHHashSet;
using Hash = HashMethod;
using HashSetImpl = doris::vectorized::flat_hash_set<Key, Hash>;
using key_type = Key;
using mapped_type = void;
using value_type = void;
using Value = void*;
using LookupResult = void*;
PHHashSet() = default;
PHHashSet(size_t reserve_for_num_elements) { _hash_set.reserve(reserve_for_num_elements); }
PHHashSet(PHHashSet&& other) { *this = std::move(other); }
PHHashSet& operator=(PHHashSet&& rhs) {
_hash_set.clear();
_hash_set = std::move(rhs._hash_set);
return *this;
}
size_t hash(const Key& x) const { return _hash_set.hash(x); }
template <typename KeyHolder, typename Func>
void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, Func&& f) {
_hash_set.lazy_emplace(key_holder, [&](const auto& ctor) { f(ctor, key_holder); });
}
template <typename KeyHolder, typename Func>
void ALWAYS_INLINE lazy_emplace(KeyHolder&& key, LookupResult& it, size_t hash_value,
Func&& f) {
_hash_set.lazy_emplace_with_hash(key, hash_value,
[&](const auto& ctor) { f(ctor, key, key); });
}
template <bool read>
void ALWAYS_INLINE prefetch(const Key& key, size_t hash_value) {
_hash_set.prefetch_hash(hash_value);
}
/// Call func(Mapped &) for each hash map element.
template <typename Func>
void for_each_mapped(Func&& func) {
for (auto& v : *this) {
func(v.get_second());
}
}
size_t get_buffer_size_in_bytes() const {
const auto capacity = _hash_set.capacity();
return capacity * sizeof(typename HashSetImpl::slot_type);
}
size_t get_buffer_size_in_cells() const { return _hash_set.capacity(); }
bool add_elem_size_overflow(size_t row) const {
const auto capacity = _hash_set.capacity();
// phmap use 7/8th as maximum load factor.
return (_hash_set.size() + row) > (capacity * 7 / 8);
}
size_t size() const { return _hash_set.size(); }
template <typename MappedType>
void* get_null_key_data() {
return nullptr;
}
bool has_null_key_data() const { return false; }
bool empty() const { return _hash_set.empty(); }
void clear_and_shrink() { _hash_set.clear(); }
void reserve(size_t num_elem) { _hash_set.reserve(num_elem); }
private:
HashSetImpl _hash_set;
};
//use to small fixed size key ,for example: int8_t, int16_t
template <typename KeyType>
class SmallFixedSizeHashSet {
public:
static_assert(std::is_integral_v<KeyType>);
static_assert(sizeof(KeyType) <= 2);
using key_type = KeyType;
using mapped_type = void;
using value_type = void;
using Value = void*;
using LookupResult = void*;
using search_key_type = std::make_unsigned_t<KeyType>;
using SetType = bool;
static constexpr SetType set_flag = true;
static constexpr SetType not_set_flag = false;
static constexpr int hash_table_size = 1 << sizeof(KeyType) * 8;
static_assert(sizeof(SetType) == 1);
SmallFixedSizeHashSet() { memset(_hash_table, not_set_flag, hash_table_size); }
SmallFixedSizeHashSet(size_t reserve_for_num_elements) {}
SmallFixedSizeHashSet(SmallFixedSizeHashSet&& other) { *this = std::move(other); }
SmallFixedSizeHashSet& operator=(SmallFixedSizeHashSet&& rhs) {
_size = rhs._size;
memcpy(_hash_table, rhs._hash_table, hash_table_size);
return *this;
}
size_t hash(const KeyType& x) const { return static_cast<search_key_type>(x); }
template <typename KeyHolder, typename Func>
void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, Func&& f) {
DCHECK(0 <= static_cast<search_key_type>(key_holder) &&
static_cast<search_key_type>(key_holder) < hash_table_size);
if (_hash_table[static_cast<search_key_type>(key_holder)] == not_set_flag) {
auto ctor = [&](auto& key_value) {
_size++;
_hash_table[static_cast<search_key_type>(key_value)] = set_flag;
};
f(ctor, key_holder);
}
}
template <typename KeyHolder, typename Func>
void ALWAYS_INLINE lazy_emplace(KeyHolder&& key, LookupResult& it, size_t hash_value,
Func&& f) {
DCHECK(0 <= static_cast<search_key_type>(key) &&
static_cast<search_key_type>(key) < hash_table_size);
if (_hash_table[static_cast<search_key_type>(key)] == not_set_flag) {
auto ctor = [&](auto& key_value) {
_size++;
_hash_table[static_cast<search_key_type>(key_value)] = set_flag;
};
f(ctor, key, key);
}
}
template <bool read>
void ALWAYS_INLINE prefetch(const KeyType& key, size_t hash_value) {}
/// Call func(Mapped &) for each hash map element.
template <typename Func>
void for_each_mapped(Func&& func) {
for (int i = 0; i < hash_table_size; i++) {
if (_hash_table[i] == set_flag) {
func(i);
}
}
}
size_t get_buffer_size_in_bytes() const { return hash_table_size; }
size_t get_buffer_size_in_cells() const { return hash_table_size; }
bool add_elem_size_overflow(size_t row) const { return false; }
size_t size() const { return _size; }
template <typename MappedType>
void* get_null_key_data() {
return nullptr;
}
bool has_null_key_data() const { return false; }
bool empty() const { return _size == 0; }
void clear_and_shrink() {}
void reserve(size_t num_elem) {}
private:
size_t _size = 0;
uint8_t _hash_table[hash_table_size];
};