blob: 1f36790bbba183ef226958ab390b7e07dec1d193 [file]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/HashTable/Hash.h
// and modified by Doris
#pragma once
#include <type_traits>
#include "parallel_hashmap/phmap_utils.h"
#include "vec/common/string_ref.h"
#include "vec/common/uint128.h"
#include "vec/core/types.h"
/** Hash functions that are better than the trivial function std::hash.
*
* Example: when we do aggregation by the visitor ID, the performance increase is more than 5 times.
* This is because of following reasons:
* - in Yandex, visitor identifier is an integer that has timestamp with seconds resolution in lower bits;
* - in typical implementation of standard library, hash function for integers is trivial and just use lower bits;
* - traffic is non-uniformly distributed across a day;
* - we are using open-addressing linear probing hash tables that are most critical to hash function quality,
* and trivial hash function gives disastrous results.
*/
/** Taken from MurmurHash. This is Murmur finalizer.
* Faster than int_hash32 when inserting into the hash table UInt64 -> UInt64, where the key is the visitor ID.
*/
inline doris::vectorized::UInt64 int_hash64(doris::vectorized::UInt64 x) {
x ^= x >> 33;
x *= 0xff51afd7ed558ccdULL;
x ^= x >> 33;
x *= 0xc4ceb9fe1a85ec53ULL;
x ^= x >> 33;
return x;
}
/** CRC32C is not very high-quality as a hash function,
* according to avalanche and bit independence tests (see SMHasher software), as well as a small number of bits,
* but can behave well when used in hash tables,
* due to high speed (latency 3 + 1 clock cycle, throughput 1 clock cycle).
* Works only with SSE 4.2 support.
*/
#include "util/sse_util.hpp"
inline doris::vectorized::UInt64 int_hash_crc32(doris::vectorized::UInt64 x) {
#if defined(__SSE4_2__) || (defined(__aarch64__) && defined(__ARM_FEATURE_CRC32))
return _mm_crc32_u64(-1ULL, x);
#else
/// On other platforms we do not have CRC32. NOTE This can be confusing.
return int_hash64(x);
#endif
}
template <typename T>
inline size_t default_hash64(T key) {
union {
T in;
doris::vectorized::UInt64 out;
} u;
u.out = 0;
u.in = key;
return int_hash64(u.out);
}
template <typename T, typename Enable = void>
struct DefaultHash;
template <typename T>
requires std::is_arithmetic_v<T>
struct DefaultHash<T> {
size_t operator()(T key) const { return default_hash64<T>(key); }
};
template <>
struct DefaultHash<doris::vectorized::Int128I> {
size_t operator()(doris::vectorized::Int128I key) const {
return default_hash64<doris::vectorized::Int128I>(key);
}
};
template <>
struct DefaultHash<doris::StringRef> : public doris::StringRefHash {};
template <typename T>
struct HashCRC32;
template <typename T>
inline size_t hash_crc32(T key) {
union {
T in;
doris::vectorized::UInt64 out;
} u;
u.out = 0;
u.in = key;
return int_hash_crc32(u.out);
}
template <>
inline size_t hash_crc32(doris::vectorized::UInt128 u) {
return doris::vectorized::UInt128HashCRC32()(u);
}
template <>
inline size_t hash_crc32(doris::vectorized::Int128 u) {
return doris::vectorized::UInt128HashCRC32()(
doris::vectorized::UInt128((u >> 64) & int64_t(-1), u & int64_t(-1)));
}
#define DEFINE_HASH(T) \
template <> \
struct HashCRC32<T> { \
size_t operator()(T key) const { \
return hash_crc32<T>(key); \
} \
};
DEFINE_HASH(doris::vectorized::UInt8)
DEFINE_HASH(doris::vectorized::UInt16)
DEFINE_HASH(doris::vectorized::UInt32)
DEFINE_HASH(doris::vectorized::UInt64)
DEFINE_HASH(doris::vectorized::UInt128)
DEFINE_HASH(doris::vectorized::Int8)
DEFINE_HASH(doris::vectorized::Int16)
DEFINE_HASH(doris::vectorized::Int32)
DEFINE_HASH(doris::vectorized::Int64)
DEFINE_HASH(doris::vectorized::Int128)
DEFINE_HASH(doris::vectorized::Float32)
DEFINE_HASH(doris::vectorized::Float64)
#undef DEFINE_HASH
template <typename Key, typename Hash = HashCRC32<Key>>
struct HashMixWrapper {
size_t operator()(Key key) const { return phmap::phmap_mix<sizeof(size_t)>()(Hash()(key)); }
};
template <>
struct HashCRC32<doris::vectorized::UInt256> {
size_t operator()(const doris::vectorized::UInt256& x) const {
#if defined(__SSE4_2__) || defined(__aarch64__)
doris::vectorized::UInt64 crc = -1ULL;
crc = _mm_crc32_u64(crc, x.a);
crc = _mm_crc32_u64(crc, x.b);
crc = _mm_crc32_u64(crc, x.c);
crc = _mm_crc32_u64(crc, x.d);
return crc;
#else
return Hash128to64({Hash128to64({x.a, x.b}), Hash128to64({x.c, x.d})});
#endif
}
};
/// It is reasonable to use for UInt8, UInt16 with sufficient hash table size.
struct TrivialHash {
template <typename T>
size_t operator()(T key) const {
return key;
}
};
/** A relatively good non-cryptographic hash function from UInt64 to UInt32.
* But worse (both in quality and speed) than just cutting int_hash64.
* Taken from here: http://www.concentric.net/~ttwang/tech/inthash.htm
*
* Slightly changed compared to the function by link: shifts to the right are accidentally replaced by a cyclic shift to the right.
* This change did not affect the smhasher test results.
*
* It is recommended to use different salt for different tasks.
* That was the case that in the database values were sorted by hash (for low-quality pseudo-random spread),
* and in another place, in the aggregate function, the same hash was used in the hash table,
* as a result, this aggregate function was monstrously slowed due to collisions.
*
* NOTE Salting is far from perfect, because it commutes with first steps of calculation.
*
* NOTE As mentioned, this function is slower than int_hash64.
* But occasionally, it is faster, when written in a loop and loop is vectorized.
*/
template <doris::vectorized::UInt64 salt>
inline doris::vectorized::UInt32 int_hash32(doris::vectorized::UInt64 key) {
key ^= salt;
key = (~key) + (key << 18);
key = key ^ ((key >> 31) | (key << 33));
key = key * 21;
key = key ^ ((key >> 11) | (key << 53));
key = key + (key << 6);
key = key ^ ((key >> 22) | (key << 42));
return key;
}
/// For containers.
template <typename T, doris::vectorized::UInt64 salt = 0>
struct IntHash32 {
size_t operator()(const T& key) const { return int_hash32<salt>(key); }
};