blob: e4287b9ecff0cbd5b0f43b2664e04e26989292b9 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/MemcmpSmall.h
// and modified by Doris
#pragma once
#include <algorithm>
#include <cstdint>
namespace doris::vectorized::detail {
#include "common/compile_check_begin.h"
inline int cmp(uint8_t a, uint8_t b) {
if (a < b) return -1;
if (a > b) return 1;
return 0;
}
inline int cmp(size_t a, size_t b) {
if (a < b) return -1;
if (a > b) return 1;
return 0;
}
} // namespace doris::vectorized::detail
/// We can process uninitialized memory in the functions below.
/// Results don't depend on the values inside uninitialized memory but Memory Sanitizer cannot see it.
/// Disable optimized functions if compile with Memory Sanitizer.
#if (defined(__SSE2__) && !defined(__aarch64__)) && !defined(MEMORY_SANITIZER)
#include "util/sse_util.hpp"
/** All functions works under the following assumptions:
* - it's possible to read up to 15 excessive bytes after end of 'a' and 'b' region;
* - memory regions are relatively small and extra loop unrolling is not worth to do.
*/
inline int memcmp_small_allow_overflow15(const uint8_t* a, size_t a_size, const uint8_t* b,
size_t b_size) {
size_t min_size = std::min(a_size, b_size);
for (size_t offset = 0; offset < min_size; offset += 16) {
uint16_t mask = static_cast<uint16_t>(_mm_movemask_epi8(
_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a + offset)),
_mm_loadu_si128(reinterpret_cast<const __m128i*>(b + offset)))));
mask = ~mask;
if (mask) {
offset += __builtin_ctz(mask);
if (offset >= min_size) break;
return doris::vectorized::detail::cmp(a[offset], b[offset]);
}
}
return doris::vectorized::detail::cmp(a_size, b_size);
}
/** Variant when memory regions have same size.
* TODO Check if the compiler can optimize previous function when the caller pass identical sizes.
*/
inline int memcmp_small_allow_overflow15(const uint8_t* a, const uint8_t* b, size_t size) {
for (size_t offset = 0; offset < size; offset += 16) {
uint16_t mask = static_cast<uint16_t>(_mm_movemask_epi8(
_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a + offset)),
_mm_loadu_si128(reinterpret_cast<const __m128i*>(b + offset)))));
mask = ~mask;
if (mask) {
offset += __builtin_ctz(mask);
if (offset >= size) return 0;
return doris::vectorized::detail::cmp(a[offset], b[offset]);
}
}
return 0;
}
/** Compare memory regions for equality.
*/
inline bool memequal_small_allow_overflow15(const uint8_t* a, size_t a_size, const uint8_t* b,
size_t b_size) {
if (a_size != b_size) return false;
for (size_t offset = 0; offset < a_size; offset += 16) {
uint16_t mask = static_cast<uint16_t>(_mm_movemask_epi8(
_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a + offset)),
_mm_loadu_si128(reinterpret_cast<const __m128i*>(b + offset)))));
mask = ~mask;
if (mask) {
offset += __builtin_ctz(mask);
return offset >= a_size;
}
}
return true;
}
/** Variant when the caller know in advance that the size is a multiple of 16.
*/
inline int memcmp_small_multiple_of16(const uint8_t* a, const uint8_t* b, size_t size) {
for (size_t offset = 0; offset < size; offset += 16) {
uint16_t mask = static_cast<uint16_t>(_mm_movemask_epi8(
_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a + offset)),
_mm_loadu_si128(reinterpret_cast<const __m128i*>(b + offset)))));
mask = ~mask;
if (mask) {
offset += __builtin_ctz(mask);
return doris::vectorized::detail::cmp(a[offset], b[offset]);
}
}
return 0;
}
/** Variant when the size is 16 exactly.
*/
inline int memcmp16(const uint8_t* a, const uint8_t* b) {
uint16_t mask = static_cast<uint16_t>(_mm_movemask_epi8(
_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a)),
_mm_loadu_si128(reinterpret_cast<const __m128i*>(b)))));
mask = ~mask;
if (mask) {
auto offset = __builtin_ctz(mask);
return doris::vectorized::detail::cmp(a[offset], b[offset]);
}
return 0;
}
/** Variant when the size is 16 exactly.
*/
inline bool memequal16(const void* a, const void* b) {
return 0xFFFF ==
_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a)),
_mm_loadu_si128(reinterpret_cast<const __m128i*>(b))));
}
/** Compare memory region to zero */
inline bool memory_is_zero_small_allow_overflow15(const void* data, size_t size) {
const __m128i zero16 = _mm_setzero_si128();
for (size_t offset = 0; offset < size; offset += 16) {
uint16_t mask = static_cast<uint16_t>(_mm_movemask_epi8(
_mm_cmpeq_epi8(zero16, _mm_loadu_si128(reinterpret_cast<const __m128i*>(
reinterpret_cast<const char*>(data) + offset)))));
mask = ~mask;
if (mask) {
offset += __builtin_ctz(mask);
return offset >= size;
}
}
return true;
}
#else
#include <cstring>
inline int memcmp_small_allow_overflow15(const uint8_t* a, size_t a_size, const uint8_t* b,
size_t b_size) {
if (auto res = memcmp(a, b, std::min(a_size, b_size)))
return res;
else
return doris::vectorized::detail::cmp(a_size, b_size);
}
inline int memcmp_small_allow_overflow15(const uint8_t* a, const uint8_t* b, size_t size) {
return memcmp(a, b, size);
}
inline bool memequal_small_allow_overflow15(const uint8_t* a, size_t a_size, const uint8_t* b,
size_t b_size) {
return a_size == b_size && 0 == memcmp(a, b, a_size);
}
inline int memcmp_small_multiple_of16(const uint8_t* a, const uint8_t* b, size_t size) {
return memcmp(a, b, size);
}
inline int memcmp16(const uint8_t* a, const uint8_t* b) {
return memcmp(a, b, 16);
}
inline bool memequal16(const void* a, const void* b) {
return 0 == memcmp(a, b, 16);
}
inline bool memory_is_zero_small_allow_overflow15(const void* data, size_t size) {
const char* pos = reinterpret_cast<const char*>(data);
const char* end = pos + size;
for (; pos < end; ++pos)
if (*pos) return false;
return true;
}
#endif
#include "common/compile_check_end.h"