blob: a19436e3f44a9c5d7fb9e95f491515260b4f79db [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
// Note - The x86 and x64 versions do _not_ produce the same results, as the
// algorithms are optimized for their respective platforms. You can still
// compile and run any of them on any platform, but your performance with the
// non-native version will be less than optimal.
#include "parquet/murmur3.h"
namespace parquet {
#if defined(_MSC_VER)
#define FORCE_INLINE __forceinline
#define ROTL64(x, y) _rotl64(x, y)
#else // defined(_MSC_VER)
#define FORCE_INLINE inline __attribute__((always_inline))
inline uint64_t rotl64(uint64_t x, int8_t r) { return (x << r) | (x >> (64 - r)); }
#define ROTL64(x, y) rotl64(x, y)
#endif // !defined(_MSC_VER)
#define BIG_CONSTANT(x) (x##LLU)
//-----------------------------------------------------------------------------
// Block read - if your platform needs to do endian-swapping or can only
// handle aligned reads, do the conversion here
FORCE_INLINE uint32_t getblock32(const uint32_t* p, int i) { return p[i]; }
FORCE_INLINE uint64_t getblock64(const uint64_t* p, int i) { return p[i]; }
//-----------------------------------------------------------------------------
// Finalization mix - force all bits of a hash block to avalanche
FORCE_INLINE uint32_t fmix32(uint32_t h) {
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
//----------
FORCE_INLINE uint64_t fmix64(uint64_t k) {
k ^= k >> 33;
k *= BIG_CONSTANT(0xff51afd7ed558ccd);
k ^= k >> 33;
k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
k ^= k >> 33;
return k;
}
//-----------------------------------------------------------------------------
void Hash_x64_128(const void* key, const int len, const uint32_t seed, uint64_t out[2]) {
const uint8_t* data = (const uint8_t*)key;
const int nblocks = len / 16;
uint64_t h1 = seed;
uint64_t h2 = seed;
const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
//----------
// body
const uint64_t* blocks = (const uint64_t*)(data);
for (int i = 0; i < nblocks; i++) {
uint64_t k1 = getblock64(blocks, i * 2 + 0);
uint64_t k2 = getblock64(blocks, i * 2 + 1);
k1 *= c1;
k1 = ROTL64(k1, 31);
k1 *= c2;
h1 ^= k1;
h1 = ROTL64(h1, 27);
h1 += h2;
h1 = h1 * 5 + 0x52dce729;
k2 *= c2;
k2 = ROTL64(k2, 33);
k2 *= c1;
h2 ^= k2;
h2 = ROTL64(h2, 31);
h2 += h1;
h2 = h2 * 5 + 0x38495ab5;
}
//----------
// tail
const uint8_t* tail = (const uint8_t*)(data + nblocks * 16);
uint64_t k1 = 0;
uint64_t k2 = 0;
switch (len & 15) {
case 15:
k2 ^= ((uint64_t)tail[14]) << 48;
case 14:
k2 ^= ((uint64_t)tail[13]) << 40;
case 13:
k2 ^= ((uint64_t)tail[12]) << 32;
case 12:
k2 ^= ((uint64_t)tail[11]) << 24;
case 11:
k2 ^= ((uint64_t)tail[10]) << 16;
case 10:
k2 ^= ((uint64_t)tail[9]) << 8;
case 9:
k2 ^= ((uint64_t)tail[8]) << 0;
k2 *= c2;
k2 = ROTL64(k2, 33);
k2 *= c1;
h2 ^= k2;
case 8:
k1 ^= ((uint64_t)tail[7]) << 56;
case 7:
k1 ^= ((uint64_t)tail[6]) << 48;
case 6:
k1 ^= ((uint64_t)tail[5]) << 40;
case 5:
k1 ^= ((uint64_t)tail[4]) << 32;
case 4:
k1 ^= ((uint64_t)tail[3]) << 24;
case 3:
k1 ^= ((uint64_t)tail[2]) << 16;
case 2:
k1 ^= ((uint64_t)tail[1]) << 8;
case 1:
k1 ^= ((uint64_t)tail[0]) << 0;
k1 *= c1;
k1 = ROTL64(k1, 31);
k1 *= c2;
h1 ^= k1;
}
//----------
// finalization
h1 ^= len;
h2 ^= len;
h1 += h2;
h2 += h1;
h1 = fmix64(h1);
h2 = fmix64(h2);
h1 += h2;
h2 += h1;
reinterpret_cast<uint64_t*>(out)[0] = h1;
reinterpret_cast<uint64_t*>(out)[1] = h2;
}
template <typename T>
uint64_t HashHelper(T value, uint32_t seed) {
uint64_t output[2];
Hash_x64_128(reinterpret_cast<void*>(&value), sizeof(T), seed, output);
return output[0];
}
uint64_t MurmurHash3::Hash(int32_t value) const { return HashHelper(value, seed_); }
uint64_t MurmurHash3::Hash(int64_t value) const { return HashHelper(value, seed_); }
uint64_t MurmurHash3::Hash(float value) const { return HashHelper(value, seed_); }
uint64_t MurmurHash3::Hash(double value) const { return HashHelper(value, seed_); }
uint64_t MurmurHash3::Hash(const FLBA* value, uint32_t len) const {
uint64_t out[2];
Hash_x64_128(reinterpret_cast<const void*>(value->ptr), len, seed_, out);
return out[0];
}
uint64_t MurmurHash3::Hash(const Int96* value) const {
uint64_t out[2];
Hash_x64_128(reinterpret_cast<const void*>(value->value), sizeof(value->value), seed_,
out);
return out[0];
}
uint64_t MurmurHash3::Hash(const ByteArray* value) const {
uint64_t out[2];
Hash_x64_128(reinterpret_cast<const void*>(value->ptr), value->len, seed_, out);
return out[0];
}
} // namespace parquet