blob: a11ec17ec7052761ffa1c1f068900834a9a63063 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/ColumnsHashingImpl.h
// and modified by Doris
#pragma once
#include "vec/columns/column.h"
#include "vec/columns/column_nullable.h"
#include "vec/common/assert_cast.h"
#include "vec/common/hash_table/ph_hash_map.h"
#include "vec/common/hash_table/ph_hash_set.h"
namespace doris::vectorized {
namespace ColumnsHashing {
namespace columns_hashing_impl {
template <typename Value, bool consecutive_keys_optimization_>
struct LastElementCache {
static constexpr bool consecutive_keys_optimization = consecutive_keys_optimization_;
Value value;
bool empty = true;
bool found = false;
bool check(const Value& value_) { return !empty && value == value_; }
template <typename Key>
bool check(const Key& key) {
return !empty && value.first == key;
}
};
template <typename Data>
struct LastElementCache<Data, false> {
static constexpr bool consecutive_keys_optimization = false;
};
template <typename Mapped>
class FindResultImpl {
Mapped* value = nullptr;
bool found;
public:
FindResultImpl(Mapped* value_, bool found_) : value(value_), found(found_) {}
bool is_found() const { return found; }
Mapped& get_mapped() const { return *value; }
};
template <>
class FindResultImpl<void> {
bool found;
public:
explicit FindResultImpl(bool found_) : found(found_) {}
bool is_found() const { return found; }
};
template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization>
class HashMethodBase {
public:
using FindResult = FindResultImpl<Mapped>;
static constexpr bool has_mapped = !std::is_same_v<Mapped, void>;
using Cache = LastElementCache<Value, consecutive_keys_optimization>;
template <typename Data, typename Func, typename CreatorForNull, typename KeyHolder>
ALWAYS_INLINE Mapped* lazy_emplace_key(Data& data, size_t row, KeyHolder&& key,
size_t hash_value, Func&& f,
CreatorForNull&& null_creator) {
return lazy_emplace_impl(std::forward<KeyHolder>(key), hash_value, data,
std::forward<Func>(f));
}
template <typename Data, typename Key>
ALWAYS_INLINE FindResult find_key_with_hash(Data& data, size_t i, Key key, size_t hash_value) {
return find_key_impl(key, hash_value, data);
}
protected:
Cache cache;
HashMethodBase() {
if constexpr (consecutive_keys_optimization) {
if constexpr (has_mapped) {
/// Init PairNoInit elements.
cache.value.second = Mapped();
cache.value.first = {};
} else
cache.value = Value();
}
}
template <typename Data, typename KeyHolder, typename Func>
ALWAYS_INLINE Mapped* lazy_emplace_impl(KeyHolder& key_holder, size_t hash_value, Data& data,
Func&& f) {
typename Data::LookupResult it;
data.lazy_emplace(key_holder, it, hash_value, std::forward<Func>(f));
return lookup_result_get_mapped(it);
}
template <typename Data, typename Key>
ALWAYS_INLINE FindResult find_key_impl(Key key, size_t hash_value, Data& data) {
if constexpr (Cache::consecutive_keys_optimization) {
if (cache.check(key)) {
if constexpr (has_mapped)
return FindResult(&cache.value.second, cache.found);
else
return FindResult(cache.found);
}
}
auto it = data.find(key, hash_value);
if constexpr (consecutive_keys_optimization) {
cache.found = it != nullptr;
cache.empty = false;
if constexpr (has_mapped) {
cache.value.first = key;
if (it) {
cache.value.second = *lookup_result_get_mapped(it);
}
} else {
cache.value = key;
}
}
if constexpr (has_mapped)
return FindResult(it ? lookup_result_get_mapped(it) : nullptr, it != nullptr);
else
return FindResult(it != nullptr);
}
};
} // namespace columns_hashing_impl
} // namespace ColumnsHashing
} // namespace doris::vectorized