blob: e9795882b36bd012906a2f94a5cea81179d6293f [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// the following code are modified from RocksDB:
// https://github.com/facebook/rocksdb/blob/master/util/crc32c_test.cc
#include <crc32c/crc32c.h>
#include <gtest/gtest-message.h>
#include <gtest/gtest-test-part.h>
#include <string.h>
#include <zlib.h>
#include <cstdint>
#include <limits>
#include <vector>
#include "gtest/gtest_pred_impl.h"
#include "util/hash_util.hpp"
#include "util/slice.h"
namespace doris {
TEST(CRC, StandardResults) {
// Original Fast_CRC32 tests.
// From rfc3720 section B.4.
char buf[32];
memset(buf, 0, sizeof(buf));
EXPECT_EQ(0x8a9136aaU, crc32c::Crc32c(buf, sizeof(buf)));
memset(buf, 0xff, sizeof(buf));
EXPECT_EQ(0x62a8ab43U, crc32c::Crc32c(buf, sizeof(buf)));
for (int i = 0; i < 32; i++) {
buf[i] = static_cast<char>(i);
}
EXPECT_EQ(0x46dd794eU, crc32c::Crc32c(buf, sizeof(buf)));
for (int i = 0; i < 32; i++) {
buf[i] = static_cast<char>(31 - i);
}
EXPECT_EQ(0x113fdb5cU, crc32c::Crc32c(buf, sizeof(buf)));
unsigned char data[48] = {
0x01, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x18, 0x28, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
EXPECT_EQ(0xd9963a56, crc32c::Crc32c(reinterpret_cast<char*>(data), sizeof(data)));
}
TEST(CRC, Values) {
EXPECT_NE(crc32c::Crc32c(std::string("a")), crc32c::Crc32c(std::string("foo")));
}
TEST(CRC, Extend) {
auto s1 = std::string("hello ");
auto s2 = std::string("world");
EXPECT_EQ(crc32c::Crc32c(std::string("hello world")),
crc32c::Extend(crc32c::Crc32c(s1), (const uint8_t*)s2.data(), s2.size()));
std::vector<std::string_view> slices = {s1, s2};
EXPECT_EQ(crc32c::Crc32c(std::string("hello world")),
crc32c::Extend(crc32c::Crc32c(slices[0]), (const uint8_t*)s2.data(), s2.size()));
}
} // namespace doris
namespace doris {
// Helper: compute crc32c via crc32c::Crc32c for a value of type T
template <typename T>
uint32_t crc32c_reference(const T& value, uint32_t seed) {
return crc32c::Extend(seed, reinterpret_cast<const uint8_t*>(&value), sizeof(T));
}
// Helper: compute zlib crc32 for a value of type T
template <typename T>
uint32_t zlib_crc32_reference(const T& value, uint32_t seed) {
return HashUtil::zlib_crc_hash(&value, sizeof(T), seed);
}
/*
todo: fix those cases when we have a new release version; do not consider the compatibility issue
use following code to replace the old crc32c_fixed function in hash_util.hpp
template <typename T>
static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
uint32_t crc = hash ^ 0xFFFFFFFFU;
if constexpr (sizeof(T) == 1) {
crc = _mm_crc32_u8(crc, *reinterpret_cast<const uint8_t*>(&value));
} else if constexpr (sizeof(T) == 2) {
crc = _mm_crc32_u16(crc, *reinterpret_cast<const uint16_t*>(&value));
} else if constexpr (sizeof(T) == 4) {
crc = _mm_crc32_u32(crc, *reinterpret_cast<const uint32_t*>(&value));
} else if constexpr (sizeof(T) == 8) {
crc = (uint32_t)_mm_crc32_u64(crc, *reinterpret_cast<const uint64_t*>(&value));
} else {
return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
}
return crc ^ 0xFFFFFFFFU;
}
// ==================== crc32c_fixed tests ====================
TEST(CRC32CFixed, Uint8Values) {
uint8_t values[] = {0, 1, 127, 128, 255};
for (uint32_t seed : {0U, 1U, 0xFFFFFFFFU, 0xDEADBEEFU}) {
for (auto v : values) {
EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, seed))
<< "uint8_t v=" << (int)v << " seed=" << seed;
}
}
}
TEST(CRC32CFixed, Uint16Values) {
uint16_t values[] = {0, 1, 255, 256, 1000, 32767, 65535};
for (uint32_t seed : {0U, 1U, 0xFFFFFFFFU, 0x12345678U}) {
for (auto v : values) {
EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, seed))
<< "uint16_t v=" << v << " seed=" << seed;
}
}
}
TEST(CRC32CFixed, Int32Values) {
int32_t values[] = {0,
1,
-1,
42,
-42,
1000000,
-1000000,
std::numeric_limits<int32_t>::min(),
std::numeric_limits<int32_t>::max()};
for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xCAFEBABEU}) {
for (auto v : values) {
EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, seed))
<< "int32_t v=" << v << " seed=" << seed;
}
}
}
TEST(CRC32CFixed, Uint32Values) {
uint32_t values[] = {0, 1, 0xFF, 0xFFFF, 0xFFFFFFFF, 0xDEADBEEF, 0x12345678};
for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xABCD1234U}) {
for (auto v : values) {
EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, seed))
<< "uint32_t v=" << v << " seed=" << seed;
}
}
}
TEST(CRC32CFixed, Int64Values) {
int64_t values[] = {0,
1,
-1,
1000000000LL,
-1000000000LL,
std::numeric_limits<int64_t>::min(),
std::numeric_limits<int64_t>::max(),
0x0102030405060708LL,
-0x0102030405060708LL};
for (uint32_t seed : {0U, 0xFFFFFFFFU, 0x87654321U}) {
for (auto v : values) {
EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, seed))
<< "int64_t v=" << v << " seed=" << seed;
}
}
}
TEST(CRC32CFixed, Uint64Values) {
uint64_t values[] = {0,
1,
0xFFFFFFFFFFFFFFFFULL,
0xDEADBEEFCAFEBABEULL,
0x0123456789ABCDEFULL,
0xFF00FF00FF00FF00ULL};
for (uint32_t seed : {0U, 0xFFFFFFFFU, 0x11111111U}) {
for (auto v : values) {
EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, seed))
<< "uint64_t v=" << v << " seed=" << seed;
}
}
}
TEST(CRC32CFixed, FloatValues) {
float values[] = {0.0f,
-0.0f,
1.0f,
-1.0f,
3.14f,
std::numeric_limits<float>::min(),
std::numeric_limits<float>::max(),
std::numeric_limits<float>::infinity()};
for (uint32_t seed : {0U, 0xFFFFFFFFU}) {
for (auto v : values) {
EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, seed))
<< "float v=" << v << " seed=" << seed;
}
}
}
TEST(CRC32CFixed, DoubleValues) {
double values[] = {0.0,
-0.0,
1.0,
-1.0,
3.141592653589793,
1e100,
-1e100,
std::numeric_limits<double>::infinity()};
for (uint32_t seed : {0U, 0xFFFFFFFFU}) {
for (auto v : values) {
EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, seed))
<< "double v=" << v << " seed=" << seed;
}
}
}
TEST(CRC32CFixed, NullHash) {
// crc32c_null should match crc32c_fixed with int(0)
for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xDEADBEEFU}) {
int zero = 0;
EXPECT_EQ(HashUtil::crc32c_null(seed), HashUtil::crc32c_fixed(zero, seed));
EXPECT_EQ(HashUtil::crc32c_null(seed), crc32c_reference(zero, seed));
}
}
*/
// ==================== zlib_crc32_fixed tests ====================
TEST(ZlibCRC32Fixed, Uint8Values) {
uint8_t values[] = {0, 1, 42, 127, 128, 255};
for (uint32_t seed : {0U, 1U, 0xFFFFFFFFU, 0xDEADBEEFU}) {
for (auto v : values) {
EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, seed))
<< "uint8_t v=" << (int)v << " seed=" << seed;
}
}
}
TEST(ZlibCRC32Fixed, Int16Values) {
int16_t values[] = {0, 1, -1, 256, -256, 32767, -32768};
for (uint32_t seed : {0U, 0xFFFFFFFFU, 0x12345678U}) {
for (auto v : values) {
EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, seed))
<< "int16_t v=" << v << " seed=" << seed;
}
}
}
TEST(ZlibCRC32Fixed, Uint16Values) {
uint16_t values[] = {0, 1, 255, 256, 1000, 32767, 65535};
for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xABCDEF00U}) {
for (auto v : values) {
EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, seed))
<< "uint16_t v=" << v << " seed=" << seed;
}
}
}
TEST(ZlibCRC32Fixed, Int32Values) {
int32_t values[] = {0,
1,
-1,
42,
-42,
1000000,
-1000000,
std::numeric_limits<int32_t>::min(),
std::numeric_limits<int32_t>::max()};
for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xCAFEBABEU}) {
for (auto v : values) {
EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, seed))
<< "int32_t v=" << v << " seed=" << seed;
}
}
}
TEST(ZlibCRC32Fixed, Uint32Values) {
uint32_t values[] = {0, 1, 0xFF, 0xFFFF, 0xFFFFFFFF, 0xDEADBEEF, 0x12345678};
for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xABCD1234U}) {
for (auto v : values) {
EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, seed))
<< "uint32_t v=" << v << " seed=" << seed;
}
}
}
TEST(ZlibCRC32Fixed, Int64Values) {
int64_t values[] = {0,
1,
-1,
1000000000LL,
-1000000000LL,
std::numeric_limits<int64_t>::min(),
std::numeric_limits<int64_t>::max(),
0x0102030405060708LL,
-0x0102030405060708LL};
for (uint32_t seed : {0U, 0xFFFFFFFFU, 0x87654321U}) {
for (auto v : values) {
EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, seed))
<< "int64_t v=" << v << " seed=" << seed;
}
}
}
TEST(ZlibCRC32Fixed, Uint64Values) {
uint64_t values[] = {0,
1,
0xFFFFFFFFFFFFFFFFULL,
0xDEADBEEFCAFEBABEULL,
0x0123456789ABCDEFULL,
0xFF00FF00FF00FF00ULL};
for (uint32_t seed : {0U, 0xFFFFFFFFU, 0x11111111U}) {
for (auto v : values) {
EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, seed))
<< "uint64_t v=" << v << " seed=" << seed;
}
}
}
TEST(ZlibCRC32Fixed, FloatValues) {
float values[] = {0.0f,
-0.0f,
1.0f,
-1.0f,
3.14f,
1e10f,
-1e10f,
std::numeric_limits<float>::min(),
std::numeric_limits<float>::max(),
std::numeric_limits<float>::infinity()};
for (uint32_t seed : {0U, 0xFFFFFFFFU}) {
for (auto v : values) {
EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, seed))
<< "float v=" << v << " seed=" << seed;
}
}
}
TEST(ZlibCRC32Fixed, DoubleValues) {
double values[] = {0.0,
-0.0,
1.0,
-1.0,
3.141592653589793,
1e100,
-1e100,
1e-300,
std::numeric_limits<double>::infinity()};
for (uint32_t seed : {0U, 0xFFFFFFFFU}) {
for (auto v : values) {
EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, seed))
<< "double v=" << v << " seed=" << seed;
}
}
}
TEST(ZlibCRC32Fixed, NullHash) {
// zlib_crc_hash_null should match zlib_crc32_fixed with int(0)
for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xDEADBEEFU}) {
int zero = 0;
EXPECT_EQ(HashUtil::zlib_crc_hash_null(seed), HashUtil::zlib_crc32_fixed(zero, seed));
EXPECT_EQ(HashUtil::zlib_crc_hash_null(seed), zlib_crc32_reference(zero, seed));
}
}
// ==================== Cross-validation: fixed vs non-fixed should differ ====================
TEST(CRC32Fixed, CRC32CVsZlibDiffer) {
// CRC32C and standard CRC32 use different polynomials, so results should differ
// (except possibly by coincidence on some values, but not systematically)
int32_t v = 12345678;
uint32_t seed = 0;
uint32_t crc32c_result = HashUtil::crc32c_fixed(v, seed);
uint32_t zlib_result = HashUtil::zlib_crc32_fixed(v, seed);
EXPECT_NE(crc32c_result, zlib_result)
<< "CRC32C and zlib CRC32 should produce different results for non-trivial input";
}
// ==================== Chaining: verify incremental hashing ====================
/*
TEST(CRC32CFixed, IncrementalChaining) {
// Hash two int32 values incrementally and compare with hashing 8 bytes at once
int32_t a = 0x11223344;
int32_t b = 0x55667788;
uint32_t seed = 0;
uint32_t chained = HashUtil::crc32c_fixed(a, seed);
chained = HashUtil::crc32c_fixed(b, chained);
// Reference: hash the 8 bytes sequentially via crc32c::Extend
uint8_t buf[8];
memcpy(buf, &a, 4);
memcpy(buf + 4, &b, 4);
uint32_t reference = crc32c::Extend(seed, buf, 8);
EXPECT_EQ(chained, reference);
}
*/
TEST(ZlibCRC32Fixed, IncrementalChaining) {
// Hash two int32 values incrementally and compare with hashing 8 bytes at once
int32_t a = 0x11223344;
int32_t b = 0x55667788;
uint32_t seed = 0;
uint32_t chained = HashUtil::zlib_crc32_fixed(a, seed);
chained = HashUtil::zlib_crc32_fixed(b, chained);
// Reference: hash the 8 bytes sequentially via zlib crc32
uint8_t buf[8];
memcpy(buf, &a, 4);
memcpy(buf + 4, &b, 4);
uint32_t reference = (uint32_t)crc32(seed, buf, 8);
EXPECT_EQ(chained, reference);
}
/*
// ==================== Exhaustive 1-byte test ====================
TEST(CRC32CFixed, AllByteValues) {
for (int i = 0; i <= 255; i++) {
uint8_t v = static_cast<uint8_t>(i);
uint32_t seed = 0x12345678U;
EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, seed)) << "byte=" << i;
}
}
TEST(ZlibCRC32Fixed, AllByteValues) {
for (int i = 0; i <= 255; i++) {
uint8_t v = static_cast<uint8_t>(i);
uint32_t seed = 0x12345678U;
EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, seed))
<< "byte=" << i;
}
}
// ==================== Sequential pattern ====================
TEST(CRC32CFixed, SequentialInt32) {
// Hash a sequence of increasing int32 values, verify each against reference
uint32_t seed = 0;
for (int32_t i = -500; i <= 500; i++) {
EXPECT_EQ(HashUtil::crc32c_fixed(i, seed), crc32c_reference(i, seed)) << "i=" << i;
}
}
TEST(ZlibCRC32Fixed, SequentialInt32) {
uint32_t seed = 0;
for (int32_t i = -500; i <= 500; i++) {
EXPECT_EQ(HashUtil::zlib_crc32_fixed(i, seed), zlib_crc32_reference(i, seed)) << "i=" << i;
}
}
*/
// ==================== Large 16-byte type fallback test ====================
TEST(ZlibCRC32Fixed, LargeTypeFallback) {
// __int128 is 16 bytes, should hit the fallback path to zlib crc32()
__int128 value = static_cast<__int128>(0x0102030405060708ULL) << 64 | 0x090A0B0C0D0E0F10ULL;
uint32_t seed = 0;
uint32_t fixed_result = HashUtil::zlib_crc32_fixed(value, seed);
uint32_t ref_result = HashUtil::zlib_crc_hash(&value, sizeof(value), seed);
EXPECT_EQ(fixed_result, ref_result);
}
} // namespace doris