blob: 15c054553cc3e29b65ee92153a0fc9604c96c91d [file]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "paimon/common/memory/memory_segment_utils.h"
#include <cassert>
#include "paimon/common/utils/murmurhash_utils.h"
namespace paimon {
std::shared_ptr<Bytes> MemorySegmentUtils::AllocateBytes(int32_t length, MemoryPool* pool) {
return Bytes::AllocateBytes(length, pool);
}
void MemorySegmentUtils::CopyFromBytes(std::vector<MemorySegment>* segments, int32_t offset,
const Bytes& bytes, int32_t bytes_offset,
int32_t num_bytes) {
if (segments->size() == 1) {
(*segments)[0].Put(offset, bytes, bytes_offset, num_bytes);
} else {
CopyMultiSegmentsFromBytes(segments, offset, bytes, bytes_offset, num_bytes);
}
}
void MemorySegmentUtils::CopyMultiSegmentsFromBytes(std::vector<MemorySegment>* segments,
int32_t offset, const Bytes& bytes,
int32_t bytes_offset, int32_t num_bytes) {
int32_t remain_size = num_bytes;
for (auto& segment : (*segments)) {
int32_t remain = segment.Size() - offset;
if (remain > 0) {
int32_t n_copy = std::min(remain, remain_size);
segment.Put(offset, bytes, num_bytes - remain_size + bytes_offset, n_copy);
remain_size -= n_copy;
// next new segment.
offset = 0;
if (remain_size == 0) {
return;
}
} else {
// remain is negative, let's advance to next segment
// now the offset = offset - segmentSize (-remain)
offset = -remain;
}
}
}
PAIMON_UNIQUE_PTR<Bytes> MemorySegmentUtils::CopyToBytes(const std::vector<MemorySegment>& segments,
int32_t offset, int32_t num_bytes,
MemoryPool* pool) {
assert(pool);
auto bytes = Bytes::AllocateBytes(num_bytes, pool);
CopyToBytes(segments, offset, bytes.get(), 0, num_bytes);
return bytes;
}
void MemorySegmentUtils::CopyToUnsafe(const std::vector<MemorySegment>& segments, int32_t offset,
void* target, int32_t num_bytes) {
if (InFirstSegment(segments, offset, num_bytes)) {
segments[0].CopyToUnsafe(offset, target, 0, num_bytes);
} else {
CopyMultiSegmentsToUnsafe(segments, offset, target, num_bytes);
}
}
void MemorySegmentUtils::CopyMultiSegmentsToUnsafe(const std::vector<MemorySegment>& segments,
int32_t offset, void* target,
int32_t num_bytes) {
int32_t remain_size = num_bytes;
for (const auto& segment : segments) {
int32_t remain = segment.Size() - offset;
if (remain > 0) {
int32_t n_copy = std::min(remain, remain_size);
segment.CopyToUnsafe(offset, target, num_bytes - remain_size, n_copy);
remain_size -= n_copy;
// next new segment.
offset = 0;
if (remain_size == 0) {
return;
}
} else {
// remain is negative, let's advance to next segment
// now the offset = offset - segmentSize (-remain)
offset = -remain;
}
}
}
std::shared_ptr<Bytes> MemorySegmentUtils::GetBytes(const std::vector<MemorySegment>& segments,
int32_t base_offset, int32_t size_in_bytes,
MemoryPool* pool) {
// avoid copy if `base` is `byte[]`
if (segments.size() == 1) {
std::shared_ptr<Bytes> heap_memory = segments[0].GetOrCreateHeapMemory(pool);
if (base_offset == 0 && heap_memory != nullptr &&
static_cast<int32_t>(heap_memory->size()) == size_in_bytes) {
return heap_memory;
} else {
std::shared_ptr<Bytes> bytes = Bytes::AllocateBytes(size_in_bytes, pool);
segments[0].Get(base_offset, bytes.get(), 0, size_in_bytes);
return bytes;
}
} else {
std::shared_ptr<Bytes> bytes = Bytes::AllocateBytes(size_in_bytes, pool);
CopyMultiSegmentsToBytes(segments, base_offset, bytes.get(), 0, size_in_bytes);
return bytes;
}
}
bool MemorySegmentUtils::InFirstSegment(const std::vector<MemorySegment>& segments, int32_t offset,
int32_t num_bytes) {
return num_bytes + offset <= segments[0].Size();
}
int32_t MemorySegmentUtils::ByteIndex(int32_t bit_index) {
return (static_cast<uint32_t>(bit_index)) >> ADDRESS_BITS_PER_WORD;
}
void MemorySegmentUtils::BitUnSet(MemorySegment* segment, int32_t base_offset, int32_t index) {
int32_t offset = base_offset + ByteIndex(index);
char current = segment->Get(offset);
current &= static_cast<char>(~(1u << (index & BIT_BYTE_INDEX_MASK)));
segment->Put(offset, current);
}
void MemorySegmentUtils::BitSet(MemorySegment* segment, int32_t base_offset, int32_t index) {
int32_t offset = base_offset + ByteIndex(index);
char current = segment->Get(offset);
current |= static_cast<char>(1u << (index & BIT_BYTE_INDEX_MASK));
segment->Put(offset, current);
}
bool MemorySegmentUtils::BitGet(const MemorySegment& segment, int32_t base_offset, int32_t index) {
int32_t offset = base_offset + ByteIndex(index);
char current = segment.Get(offset);
return (current & static_cast<char>(1u << (index & BIT_BYTE_INDEX_MASK))) != 0;
}
void MemorySegmentUtils::BitSet(std::vector<MemorySegment>* segments, int32_t base_offset,
int32_t index) {
if (segments->size() == 1) {
int32_t offset = base_offset + ByteIndex(index);
MemorySegment& segment = (*segments)[0];
char current = segment.Get(offset);
current |= static_cast<char>(1u << (index & BIT_BYTE_INDEX_MASK));
segment.Put(offset, current);
} else {
BitSetMultiSegments(segments, base_offset, index);
}
}
void MemorySegmentUtils::BitSetMultiSegments(std::vector<MemorySegment>* segments,
int32_t base_offset, int32_t index) {
int32_t offset = base_offset + ByteIndex(index);
int32_t seg_size = (*segments)[0].Size();
int32_t seg_index = offset / seg_size;
int32_t seg_offset = offset - seg_index * seg_size; // equal to %
MemorySegment& segment = (*segments)[seg_index];
char current = segment.Get(seg_offset);
current |= static_cast<char>(1u << (index & BIT_BYTE_INDEX_MASK));
segment.Put(seg_offset, current);
}
bool MemorySegmentUtils::BitGet(const std::vector<MemorySegment>& segments, int32_t base_offset,
int32_t index) {
int32_t offset = base_offset + ByteIndex(index);
char current = GetValue<char>(segments, offset);
return (current & static_cast<char>(1u << (index & BIT_BYTE_INDEX_MASK))) != 0;
}
void MemorySegmentUtils::BitUnSet(std::vector<MemorySegment>* segments, int32_t base_offset,
int32_t index) {
if (segments->size() == 1) {
MemorySegment& segment = (*segments)[0];
int32_t offset = base_offset + ByteIndex(index);
char current = segment.Get(offset);
current &= static_cast<char>(~(1u << (index & BIT_BYTE_INDEX_MASK)));
segment.Put(offset, current);
} else {
BitUnSetMultiSegments(segments, base_offset, index);
}
}
void MemorySegmentUtils::BitUnSetMultiSegments(std::vector<MemorySegment>* segments,
int32_t base_offset, int32_t index) {
int32_t offset = base_offset + ByteIndex(index);
int32_t seg_size = (*segments)[0].Size();
int32_t seg_index = offset / seg_size;
int32_t seg_offset = offset - seg_index * seg_size; // equal to %
MemorySegment& segment = (*segments)[seg_index];
char current = segment.Get(seg_offset);
current &= static_cast<char>(~(1u << (index & BIT_BYTE_INDEX_MASK)));
segment.Put(seg_offset, current);
}
bool MemorySegmentUtils::Equals(const std::vector<MemorySegment>& segments1, int32_t offset1,
const std::vector<MemorySegment>& segments2, int32_t offset2,
int32_t len) {
if (InFirstSegment(segments1, offset1, len) && InFirstSegment(segments2, offset2, len)) {
return segments1[0].EqualTo(segments2[0], offset1, offset2, len);
} else {
return EqualsMultiSegments(segments1, offset1, segments2, offset2, len);
}
}
bool MemorySegmentUtils::EqualsMultiSegments(const std::vector<MemorySegment>& segments1,
int32_t offset1,
const std::vector<MemorySegment>& segments2,
int32_t offset2, int32_t len) {
if (len == 0) {
// quick way and avoid seg_size is zero.
return true;
}
int32_t seg_size1 = segments1[0].Size();
int32_t seg_size2 = segments2[0].Size();
// find first seg_index and seg_offset of segments.
int32_t seg_index1 = offset1 / seg_size1;
int32_t seg_index2 = offset2 / seg_size2;
int32_t seg_offset1 = offset1 - seg_size1 * seg_index1; // equal to %
int32_t seg_offset2 = offset2 - seg_size2 * seg_index2; // equal to %
while (len > 0) {
int32_t equal_len =
std::min(std::min(len, seg_size1 - seg_offset1), seg_size2 - seg_offset2);
if (!segments1[seg_index1].EqualTo(segments2[seg_index2], seg_offset1, seg_offset2,
equal_len)) {
return false;
}
len -= equal_len;
seg_offset1 += equal_len;
if (seg_offset1 == seg_size1) {
seg_offset1 = 0;
seg_index1++;
}
seg_offset2 += equal_len;
if (seg_offset2 == seg_size2) {
seg_offset2 = 0;
seg_index2++;
}
}
return true;
}
int32_t MemorySegmentUtils::Find(const std::vector<MemorySegment>& segments1, int32_t offset1,
int32_t num_bytes1, const std::vector<MemorySegment>& segments2,
int32_t offset2, int32_t num_bytes2) {
if (num_bytes2 == 0) { // quick way 1.
return offset1;
}
if (InFirstSegment(segments1, offset1, num_bytes1) &&
InFirstSegment(segments2, offset2, num_bytes2)) {
char first = segments2[0].Get(offset2);
int32_t end = num_bytes1 - num_bytes2 + offset1;
for (int32_t i = offset1; i <= end; i++) {
// quick way 2: equal first byte.
if (segments1[0].Get(i) == first &&
segments1[0].EqualTo(segments2[0], i, offset2, num_bytes2)) {
return i;
}
}
return -1;
} else {
return FindInMultiSegments(segments1, offset1, num_bytes1, segments2, offset2, num_bytes2);
}
}
int32_t MemorySegmentUtils::FindInMultiSegments(const std::vector<MemorySegment>& segments1,
int32_t offset1, int32_t num_bytes1,
const std::vector<MemorySegment>& segments2,
int32_t offset2, int32_t num_bytes2) {
int32_t end = num_bytes1 - num_bytes2 + offset1;
for (int32_t i = offset1; i <= end; i++) {
if (EqualsMultiSegments(segments1, i, segments2, offset2, num_bytes2)) {
return i;
}
}
return -1;
}
int32_t MemorySegmentUtils::Hash(const std::vector<MemorySegment>& segments, int32_t offset,
int32_t num_bytes, MemoryPool* pool) {
if (InFirstSegment(segments, offset, num_bytes)) {
return MurmurHashUtils::HashBytes(segments[0], offset, num_bytes);
} else {
return HashMultiSeg(segments, offset, num_bytes, pool);
}
}
int32_t MemorySegmentUtils::HashByWords(const std::vector<MemorySegment>& segments, int32_t offset,
int32_t num_bytes, MemoryPool* pool) {
if (InFirstSegment(segments, offset, num_bytes)) {
return MurmurHashUtils::HashBytesByWords(segments[0], offset, num_bytes);
} else {
return HashMultiSegByWords(segments, offset, num_bytes, pool);
}
}
int32_t MemorySegmentUtils::HashMultiSegByWords(const std::vector<MemorySegment>& segments,
int32_t offset, int32_t num_bytes,
MemoryPool* pool) {
std::shared_ptr<Bytes> bytes = AllocateBytes(num_bytes, pool);
CopyMultiSegmentsToBytes(segments, offset, bytes.get(), 0, num_bytes);
return MurmurHashUtils::HashUnsafeBytesByWords(reinterpret_cast<void*>(bytes->data()), 0,
num_bytes);
}
int32_t MemorySegmentUtils::HashMultiSeg(const std::vector<MemorySegment>& segments, int32_t offset,
int32_t num_bytes, MemoryPool* pool) {
std::shared_ptr<Bytes> bytes = AllocateBytes(num_bytes, pool);
CopyMultiSegmentsToBytes(segments, offset, bytes.get(), 0, num_bytes);
return MurmurHashUtils::HashUnsafeBytes(reinterpret_cast<void*>(bytes->data()), 0, num_bytes);
}
} // namespace paimon