blob: d893a00591f18f065476a96d57cef863606e8fa5 [file] [log] [blame]
/*
* Copyright 2024-present Alibaba Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <cstdint>
#include <memory>
#include <vector>
#include "paimon/common/memory/memory_segment.h"
#include "paimon/memory/bytes.h"
#include "paimon/memory/memory_pool.h"
#include "paimon/visibility.h"
namespace paimon {
/// Describe a section of memory.
class PAIMON_EXPORT BinarySection {
public:
BinarySection() = default;
BinarySection(const std::vector<MemorySegment>& segments, int32_t offset, int32_t size_in_bytes)
: segments_(segments), offset_(offset), size_in_bytes_(size_in_bytes) {}
virtual ~BinarySection() = default;
/// It decides whether to put data in FixLenPart or VarLenPart. See more in `/// BinaryRow`.
/// If len is less than 8, its binary format is: 1-bit mark(1) = 1, 7-bits len, and
/// 7-bytes data. Data is stored in fix-length part.
/// If len is greater or equal to 8, its binary format is: 1-bit mark(1) = 0,
/// 31-bits offset to the data, and 4-bytes length of data. Data is stored in
/// variable-length part.
static constexpr int32_t MAX_FIX_PART_DATA_SIZE = 7;
/// To get the mark in highest bit of int64_t. Form: 10000000 00000000 ... (8 bytes)
/// This is used to decide whether the data is stored in fixed-length part or
/// variable-length part. see `MAX_FIX_PART_DATA_SIZE` for more information.
static constexpr int64_t HIGHEST_FIRST_BIT = 0x80L << 56;
/// To get the 7 bits length in second bit to eighth bit out of a int64_t. Form:
/// 01111111 00000000... (8 bytes)
/// This is used to get the length of the data which is stored in this int64_t. see
/// `MAX_FIX_PART_DATA_SIZE` for more information.
static constexpr int64_t HIGHEST_SECOND_TO_EIGHTH_BIT = 0x7FL << 56;
/// Get binary, if len less than 8, will be include in variable_part_offset_and_len.
/// @note Need to consider the ByteOrder.
/// @param base_offset base offset of composite binary format.
/// @param field_offset absolute start offset of variable_part_offset_and_len.
/// @param variable_part_offset_and_len a long value, real data or offset and len.
static PAIMON_UNIQUE_PTR<Bytes> ReadBinary(const std::vector<MemorySegment>& segments,
int32_t base_offset, int32_t field_offset,
int64_t variable_part_offset_and_len,
MemoryPool* pool);
bool operator==(const BinarySection& that) const;
virtual void PointTo(const MemorySegment& segment, int32_t offset, int32_t size_in_bytes) {
std::vector<MemorySegment> segments = {segment};
PointTo(segments, offset, size_in_bytes);
}
virtual void PointTo(const std::vector<MemorySegment>& segments, int32_t offset,
int32_t size_in_bytes) {
segments_ = segments;
offset_ = offset;
size_in_bytes_ = size_in_bytes;
}
const std::vector<MemorySegment>& GetSegments() const {
return segments_;
}
int32_t GetOffset() const {
return offset_;
}
int32_t GetSizeInBytes() const {
return size_in_bytes_;
}
std::shared_ptr<Bytes> ToBytes(MemoryPool* pool) const;
virtual int32_t HashCode() const;
protected:
std::vector<MemorySegment> segments_;
int32_t offset_ = 0;
int32_t size_in_bytes_ = 0;
};
} // namespace paimon