blob: b904728f59d74e1b81f8794b8fc8cdb85c37486b [file]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#pragma once
#include <algorithm>
#include <cstdint>
#include <cstring>
#include <memory>
#include <type_traits>
#include <vector>
#include "fmt/format.h"
#include "paimon/common/io/memory_segment_output_stream.h"
#include "paimon/common/memory/memory_segment.h"
#include "paimon/io/byte_order.h"
#include "paimon/memory/bytes.h"
#include "paimon/memory/memory_pool.h"
#include "paimon/status.h"
#include "paimon/type_fwd.h"
#include "paimon/visibility.h"
namespace paimon {
class PAIMON_EXPORT MemorySegmentUtils {
public:
MemorySegmentUtils() = delete;
~MemorySegmentUtils() = delete;
/// Allocate bytes in pool
static std::shared_ptr<Bytes> AllocateBytes(int32_t length, MemoryPool* pool);
/// Copy target segments from source byte[].
///
/// @param segments target segments.
/// @param offset target segments offset.
/// @param bytes source byte[].
/// @param bytes_offset source byte[] offset.
/// @param num_bytes the number bytes to copy.
static void CopyFromBytes(std::vector<MemorySegment>* segments, int32_t offset,
const Bytes& bytes, int32_t bytes_offset, int32_t num_bytes);
/// Copy segments to a new byte[].
///
/// @param segments Source segments.
/// @param offset Source segments offset.
/// @param num_bytes the number bytes to copy.
static PAIMON_UNIQUE_PTR<Bytes> CopyToBytes(const std::vector<MemorySegment>& segments,
int32_t offset, int32_t num_bytes,
MemoryPool* pool);
/// Copy segments to target byte[].
///
/// @param segments Source segments.
/// @param offset Source segments offset.
/// @param bytes target byte[].
/// @param bytes_offset target byte[] offset.
/// @param num_bytes the number bytes to copy.
template <typename T>
static void CopyToBytes(const std::vector<MemorySegment>& segments, int32_t offset, T* bytes,
int32_t bytes_offset, int32_t num_bytes);
/// Copy bytes of segments to output stream.
///
/// @note It just copies the data in, not include the length.
///
/// @param segments source segments
/// @param offset offset for segments
/// @param size_in_bytes size in bytes
/// @param target target output stream
static Status CopyToStream(const std::vector<MemorySegment>& segments, int32_t offset,
int32_t size_in_bytes, MemorySegmentOutputStream* target);
/// Copy segments to target unsafe pointer.
///
/// @param segments Source segments.
/// @param offset The position where the bytes are started to be read from these memory
/// segments.
/// @param target The unsafe memory to copy the bytes to.
/// @param num_bytes the number bytes to copy.
static void CopyToUnsafe(const std::vector<MemorySegment>& segments, int32_t offset,
void* target, int32_t num_bytes);
template <typename T>
static void CopyMultiSegmentsToBytes(const std::vector<MemorySegment>& segments, int32_t offset,
T* bytes, int32_t bytes_offset, int32_t num_bytes);
static std::shared_ptr<Bytes> GetBytes(const std::vector<MemorySegment>& segments,
int32_t base_offset, int32_t size_in_bytes,
MemoryPool* pool);
/// Is it just in first MemorySegment, we use quick way to do something.
static bool InFirstSegment(const std::vector<MemorySegment>& segments, int32_t offset,
int32_t num_bytes);
/// unset bit.
///
/// @param segment target segment.
/// @param base_offset bits base offset.
/// @param index bit index from base offset.
static void BitUnSet(MemorySegment* segment, int32_t base_offset, int32_t index);
/// set bit.
///
/// @param segment target segment.
/// @param base_offset bits base offset.
/// @param index bit index from base offset.
static void BitSet(MemorySegment* segment, int32_t base_offset, int32_t index);
/// read bit.
///
/// @param segment target segment.
/// @param base_offset bits base offset.
/// @param index bit index from base offset.
static bool BitGet(const MemorySegment& segment, int32_t base_offset, int32_t index);
/// set bit from segments.
///
/// @param segments target segments.
/// @param base_offset bits base offset.
/// @param index bit index from base offset.
static void BitSet(std::vector<MemorySegment>* segments, int32_t base_offset, int32_t index);
/// read bit from segments.
///
/// @param segments target segments.
/// @param base_offset bits base offset.
/// @param index bit index from base offset.
static bool BitGet(const std::vector<MemorySegment>& segments, int32_t base_offset,
int32_t index);
/// unset bit from segments.
///
/// @param segments target segments.
/// @param base_offset bits base offset.
/// @param index bit index from base offset.
static void BitUnSet(std::vector<MemorySegment>* segments, int32_t base_offset, int32_t index);
/// get value from segments. Only support: bool, char, int16_t, int32_t, int64_t, double, float
///
/// @param segments target segments.
/// @param offset value offset.
template <typename T>
static T GetValue(const std::vector<MemorySegment>& segments, int32_t offset);
/// set value to segments. Only support: bool, char, int16_t, int32_t, int64_t, double, float
///
/// @param segments target segments.
/// @param offset value offset.
template <typename T>
static void SetValue(std::vector<MemorySegment>* segments, int32_t offset, const T& value);
/*
* Equals two memory segments regions.
*
* @param segments1 Segments 1
* @param offset1 Offset of segments1 to start equaling
* @param segments2 Segments 2
* @param offset2 Offset of segments2 to start equaling
* @param len Length of the equaled memory region
* @return true if equal, false otherwise
*/
static bool Equals(const std::vector<MemorySegment>& segments1, int32_t offset1,
const std::vector<MemorySegment>& segments2, int32_t offset2, int32_t len);
static bool EqualsMultiSegments(const std::vector<MemorySegment>& segments1, int32_t offset1,
const std::vector<MemorySegment>& segments2, int32_t offset2,
int32_t len);
/// Find equal segments2 in segments1.
///
/// @param segments1 segs to find.
/// @param segments2 sub segs.
/// @return Return the found offset, return -1 if not find.
static int32_t Find(const std::vector<MemorySegment>& segments1, int32_t offset1,
int32_t num_bytes1, const std::vector<MemorySegment>& segments2,
int32_t offset2, int32_t num_bytes2);
/// hash segments to int, num_bytes must be aligned to 4 bytes.
///
/// @param segments Source segments.
/// @param offset Source segments offset.
/// @param num_bytes the number bytes to hash.
static int32_t HashByWords(const std::vector<MemorySegment>& segments, int32_t offset,
int32_t num_bytes, MemoryPool* pool);
/// hash segments to int.
///
/// @param segments Source segments.
/// @param offset Source segments offset.
/// @param num_bytes the number bytes to hash.
static int32_t Hash(const std::vector<MemorySegment>& segments, int32_t offset,
int32_t num_bytes, MemoryPool* pool);
private:
static constexpr int32_t ADDRESS_BITS_PER_WORD = 3;
static constexpr int32_t BIT_BYTE_INDEX_MASK = 7;
static constexpr int32_t MAX_BYTES_LENGTH = 1024 * 64;
static constexpr int32_t MAX_CHARS_LENGTH = 1024 * 32;
private:
template <typename T>
static void SetValueToMultiSegments(std::vector<MemorySegment>* segments, int32_t offset,
const T& value);
template <typename T>
static void SetValueSlowly(std::vector<MemorySegment>* segments, int32_t seg_size,
int32_t seg_num, int32_t seg_offset, const T& value);
template <typename T>
static T GetValueFromMultiSegments(const std::vector<MemorySegment>& segments, int32_t offset);
template <typename T>
static T GetValueSlowly(const std::vector<MemorySegment>& segments, int32_t seg_size,
int32_t seg_num, int32_t seg_offset);
static void CopyMultiSegmentsFromBytes(std::vector<MemorySegment>* segments, int32_t offset,
const Bytes& bytes, int32_t bytes_offset,
int32_t num_bytes);
static void CopyMultiSegmentsToUnsafe(const std::vector<MemorySegment>& segments,
int32_t offset, void* target, int32_t num_bytes);
static int32_t FindInMultiSegments(const std::vector<MemorySegment>& segments1, int32_t offset1,
int32_t num_bytes1,
const std::vector<MemorySegment>& segments2, int32_t offset2,
int32_t num_bytes2);
static int32_t HashMultiSeg(const std::vector<MemorySegment>& segments, int32_t offset,
int32_t num_bytes, MemoryPool* pool);
static int32_t HashMultiSegByWords(const std::vector<MemorySegment>& segments, int32_t offset,
int32_t num_bytes, MemoryPool* pool);
/// Given a bit index, return the byte index containing it.
///
/// @param bit_index the bit index.
/// @return the byte index.
static int32_t ByteIndex(int32_t bit_index);
static void BitSetMultiSegments(std::vector<MemorySegment>* segments, int32_t base_offset,
int32_t index);
static void BitUnSetMultiSegments(std::vector<MemorySegment>* segments, int32_t base_offset,
int32_t index);
};
template <typename T>
inline void MemorySegmentUtils::CopyToBytes(const std::vector<MemorySegment>& segments,
int32_t offset, T* bytes, int32_t bytes_offset,
int32_t num_bytes) {
if (InFirstSegment(segments, offset, num_bytes)) {
segments[0].Get(offset, bytes, bytes_offset, num_bytes);
} else {
CopyMultiSegmentsToBytes(segments, offset, bytes, bytes_offset, num_bytes);
}
}
template <typename T>
inline void MemorySegmentUtils::CopyMultiSegmentsToBytes(const std::vector<MemorySegment>& segments,
int32_t offset, T* bytes,
int32_t bytes_offset, int32_t num_bytes) {
int32_t remain_size = num_bytes;
for (const auto& segment : segments) {
int32_t remain = segment.Size() - offset;
if (remain > 0) {
int32_t n_copy = std::min(remain, remain_size);
segment.Get(offset, bytes, num_bytes - remain_size + bytes_offset, n_copy);
remain_size -= n_copy;
// next new segment.
offset = 0;
if (remain_size == 0) {
return;
}
} else {
// remain is negative, let's advance to next segment
// now the offset = offset - segment_size (-remain)
offset = -remain;
}
}
}
template <typename T>
inline T MemorySegmentUtils::GetValue(const std::vector<MemorySegment>& segments, int32_t offset) {
static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable");
if (InFirstSegment(segments, offset, sizeof(T))) {
return segments[0].GetValue<T>(offset);
} else {
return GetValueFromMultiSegments<T>(segments, offset);
}
}
template <typename T>
inline void MemorySegmentUtils::SetValue(std::vector<MemorySegment>* segments, int32_t offset,
const T& value) {
static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable");
if (InFirstSegment(*segments, offset, sizeof(T))) {
(*segments)[0].PutValue<T>(offset, value);
} else {
SetValueToMultiSegments<T>(segments, offset, value);
}
}
template <typename T>
inline void MemorySegmentUtils::SetValueToMultiSegments(std::vector<MemorySegment>* segments,
int32_t offset, const T& value) {
int32_t seg_size = (*segments)[0].Size();
int32_t seg_index = offset / seg_size;
int32_t seg_offset = offset - seg_index * seg_size; // equal to %
if (seg_offset <= seg_size - static_cast<int32_t>(sizeof(T))) {
(*segments)[seg_index].PutValue<T>(seg_offset, value);
} else {
SetValueSlowly<T>(segments, seg_size, seg_index, seg_offset, value);
}
}
template <>
inline void MemorySegmentUtils::SetValueToMultiSegments(std::vector<MemorySegment>* segments,
int32_t offset, const float& value) {
int32_t seg_size = (*segments)[0].Size();
int32_t seg_index = offset / seg_size;
int32_t seg_offset = offset - seg_index * seg_size; // equal to %
if (seg_offset <= seg_size - static_cast<int32_t>(sizeof(float))) {
(*segments)[seg_index].PutValue<float>(seg_offset, value);
} else {
int32_t int_value;
std::memcpy(&int_value, &value, sizeof(float));
SetValueSlowly<int32_t>(segments, seg_size, seg_index, seg_offset, int_value);
}
}
template <>
inline void MemorySegmentUtils::SetValueToMultiSegments(std::vector<MemorySegment>* segments,
int32_t offset, const double& value) {
int32_t seg_size = (*segments)[0].Size();
int32_t seg_index = offset / seg_size;
int32_t seg_offset = offset - seg_index * seg_size; // equal to %
if (seg_offset <= seg_size - static_cast<int32_t>(sizeof(double))) {
(*segments)[seg_index].PutValue<double>(seg_offset, value);
} else {
int64_t long_value;
std::memcpy(&long_value, &value, sizeof(double));
SetValueSlowly<int64_t>(segments, seg_size, seg_index, seg_offset, long_value);
}
}
template <typename T>
inline void MemorySegmentUtils::SetValueSlowly(std::vector<MemorySegment>* segments,
int32_t seg_size, int32_t seg_num,
int32_t seg_offset, const T& value) {
MemorySegment segment = (*segments)[seg_num];
for (size_t i = 0; i < sizeof(T); i++) {
if (seg_offset == seg_size) {
segment = (*segments)[++seg_num];
seg_offset = 0;
}
T unsigned_byte;
if (SystemByteOrder() == ByteOrder::PAIMON_LITTLE_ENDIAN) {
unsigned_byte = value >> (i * 8);
} else {
int32_t shift_count = sizeof(T) - 1;
unsigned_byte = value >> ((shift_count - i) * 8);
}
segment.Put(seg_offset, static_cast<char>(unsigned_byte));
seg_offset++;
}
}
template <typename T>
inline T MemorySegmentUtils::GetValueFromMultiSegments(const std::vector<MemorySegment>& segments,
int32_t offset) {
int32_t seg_size = segments[0].Size();
int32_t seg_index = offset / seg_size;
int32_t seg_offset = offset - seg_index * seg_size; // equal to %
if (seg_offset <= seg_size - static_cast<int32_t>(sizeof(T))) {
return segments[seg_index].GetValue<T>(seg_offset);
} else {
return GetValueSlowly<T>(segments, seg_size, seg_index, seg_offset);
}
}
template <>
inline float MemorySegmentUtils::GetValueFromMultiSegments(
const std::vector<MemorySegment>& segments, int32_t offset) {
int32_t seg_size = segments[0].Size();
int32_t seg_index = offset / seg_size;
int32_t seg_offset = offset - seg_index * seg_size; // equal to %
if (seg_offset <= seg_size - static_cast<int32_t>(sizeof(float))) {
return segments[seg_index].GetValue<float>(seg_offset);
} else {
auto int_value = GetValueSlowly<int32_t>(segments, seg_size, seg_index, seg_offset);
float float_value;
std::memcpy(&float_value, &int_value, sizeof(float));
return float_value;
}
}
template <>
inline double MemorySegmentUtils::GetValueFromMultiSegments(
const std::vector<MemorySegment>& segments, int32_t offset) {
int32_t seg_size = segments[0].Size();
int32_t seg_index = offset / seg_size;
int32_t seg_offset = offset - seg_index * seg_size; // equal to %
if (seg_offset <= seg_size - static_cast<int32_t>(sizeof(double))) {
return segments[seg_index].GetValue<double>(seg_offset);
} else {
auto long_value = GetValueSlowly<int64_t>(segments, seg_size, seg_index, seg_offset);
double double_value;
std::memcpy(&double_value, &long_value, sizeof(double));
return double_value;
}
}
template <typename T>
inline T MemorySegmentUtils::GetValueSlowly(const std::vector<MemorySegment>& segments,
int32_t seg_size, int32_t seg_num, int32_t seg_offset) {
MemorySegment segment = segments[seg_num];
if constexpr (std::is_same_v<T, bool>) {
if (seg_offset == seg_size) {
segment = segments[++seg_num];
seg_offset = 0;
}
return static_cast<bool>(static_cast<uint8_t>(segment.Get(seg_offset)));
} else {
using UnsignedT = std::make_unsigned_t<T>;
UnsignedT ret = 0;
for (size_t i = 0; i < sizeof(T); i++) {
if (seg_offset == seg_size) {
segment = segments[++seg_num];
seg_offset = 0;
}
UnsignedT unsigned_byte = static_cast<uint8_t>(segment.Get(seg_offset));
if (SystemByteOrder() == ByteOrder::PAIMON_LITTLE_ENDIAN) {
ret |= (unsigned_byte << (i * 8));
} else {
int32_t shift_count = sizeof(T) - 1;
ret |= (unsigned_byte << ((shift_count - i) * 8));
}
seg_offset++;
}
return static_cast<T>(ret);
}
}
inline Status MemorySegmentUtils::CopyToStream(const std::vector<MemorySegment>& segments,
int32_t offset, int32_t size_in_bytes,
MemorySegmentOutputStream* target) {
for (const auto& source_segment : segments) {
int32_t cur_seg_remain = source_segment.Size() - offset;
if (cur_seg_remain > 0) {
int32_t copy_size = std::min(cur_seg_remain, size_in_bytes);
target->Write(source_segment, offset, copy_size);
size_in_bytes -= copy_size;
offset = 0;
} else {
offset -= source_segment.Size();
}
if (size_in_bytes == 0) {
return Status::OK();
}
}
if (size_in_bytes != 0) {
return Status::Invalid(
fmt::format("No copy finished, this should be a bug, "
"The remaining length is: {}",
size_in_bytes));
}
return Status::OK();
}
} // namespace paimon