blob: 77fbec8db27257a4a72733c813fb52a788dca08a [file] [log] [blame]
/*
* Copyright 2024-present Alibaba Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "paimon/common/data/binary_row.h"
#include <cstdint>
#include "paimon/common/data/binary_data_read_utils.h"
#include "paimon/common/memory/memory_segment.h"
#include "paimon/common/memory/memory_segment_utils.h"
#include "paimon/io/byte_order.h"
#include "paimon/memory/bytes.h"
#include "paimon/memory/memory_pool.h"
namespace paimon {
const int64_t BinaryRow::FIRST_BYTE_ZERO =
(SystemByteOrder() == ByteOrder::PAIMON_LITTLE_ENDIAN) ? (~0xFFL) : (~(0xFFL << 56L));
const BinaryRow& BinaryRow::EmptyRow() {
static const BinaryRow empty_row = GetEmptyRow();
return empty_row;
}
BinaryRow::BinaryRow(int32_t arity)
: arity_(arity), null_bits_size_in_bytes_(CalculateBitSetWidthInBytes(arity)) {
assert(arity_ >= 0);
}
int32_t BinaryRow::CalculateBitSetWidthInBytes(int32_t arity) {
return ((arity + 63 + HEADER_SIZE_IN_BITS) / 64) * 8;
}
int32_t BinaryRow::CalculateFixPartSizeInBytes(int32_t arity) {
return CalculateBitSetWidthInBytes(arity) + 8 * arity;
}
int32_t BinaryRow::GetFieldOffset(int32_t pos) const {
return offset_ + null_bits_size_in_bytes_ + pos * 8;
}
void BinaryRow::AssertIndexIsValid(int32_t index) const {
assert(index >= 0);
assert(index < arity_);
}
int32_t BinaryRow::GetFixedLengthPartSize() const {
return null_bits_size_in_bytes_ + 8 * arity_;
}
BinaryRow BinaryRow::GetEmptyRow() {
BinaryRow row(0);
int32_t size = row.GetFixedLengthPartSize();
auto bytes = Bytes::AllocateBytes(size, GetDefaultPool().get());
row.PointTo(MemorySegment::Wrap(std::move(bytes)), 0, size);
return row;
}
Result<const RowKind*> BinaryRow::GetRowKind() const {
char kind_value = MemorySegmentUtils::GetValue<char>(segments_, offset_);
return RowKind::FromByteValue(kind_value);
}
void BinaryRow::SetRowKind(const RowKind* kind) {
MemorySegmentUtils::SetValue<char>(&segments_, offset_, kind->ToByteValue());
}
void BinaryRow::SetTotalSize(int32_t size_in_bytes) {
size_in_bytes_ = size_in_bytes;
}
bool BinaryRow::IsNullAt(int32_t pos) const {
AssertIndexIsValid(pos);
return MemorySegmentUtils::BitGet(segments_, offset_, pos + HEADER_SIZE_IN_BITS);
}
void BinaryRow::SetNotNullAt(int32_t i) {
AssertIndexIsValid(i);
MemorySegmentUtils::BitUnSet(&segments_, offset_, i + HEADER_SIZE_IN_BITS);
}
void BinaryRow::SetNullAt(int32_t i) {
AssertIndexIsValid(i);
MemorySegmentUtils::BitSet(&segments_, offset_, i + HEADER_SIZE_IN_BITS);
// We must set the fixed length part zero.
// 1.Only int/long/boolean...(Fix length type) will invoke this SetNullAt.
// 2.Set to zero in order to equals and hash operation bytes calculation.
MemorySegmentUtils::SetValue<int64_t>(&segments_, GetFieldOffset(i), 0);
}
void BinaryRow::SetInt(int32_t pos, int32_t value) {
AssertIndexIsValid(pos);
SetNotNullAt(pos);
MemorySegmentUtils::SetValue<int32_t>(&segments_, GetFieldOffset(pos), value);
}
void BinaryRow::SetLong(int32_t pos, int64_t value) {
AssertIndexIsValid(pos);
SetNotNullAt(pos);
MemorySegmentUtils::SetValue<int64_t>(&segments_, GetFieldOffset(pos), value);
}
void BinaryRow::SetDouble(int32_t pos, double value) {
AssertIndexIsValid(pos);
SetNotNullAt(pos);
MemorySegmentUtils::SetValue<double>(&segments_, GetFieldOffset(pos), value);
}
void BinaryRow::SetBoolean(int32_t pos, bool value) {
AssertIndexIsValid(pos);
SetNotNullAt(pos);
MemorySegmentUtils::SetValue<bool>(&segments_, GetFieldOffset(pos), value);
}
void BinaryRow::SetShort(int32_t pos, int16_t value) {
AssertIndexIsValid(pos);
SetNotNullAt(pos);
MemorySegmentUtils::SetValue<int16_t>(&segments_, GetFieldOffset(pos), value);
}
void BinaryRow::SetByte(int32_t pos, char value) {
AssertIndexIsValid(pos);
SetNotNullAt(pos);
MemorySegmentUtils::SetValue<char>(&segments_, GetFieldOffset(pos), value);
}
void BinaryRow::SetFloat(int32_t pos, float value) {
AssertIndexIsValid(pos);
SetNotNullAt(pos);
MemorySegmentUtils::SetValue<float>(&segments_, GetFieldOffset(pos), value);
}
bool BinaryRow::GetBoolean(int32_t pos) const {
AssertIndexIsValid(pos);
return MemorySegmentUtils::GetValue<bool>(segments_, GetFieldOffset(pos));
}
char BinaryRow::GetByte(int32_t pos) const {
AssertIndexIsValid(pos);
return MemorySegmentUtils::GetValue<char>(segments_, GetFieldOffset(pos));
}
int16_t BinaryRow::GetShort(int32_t pos) const {
AssertIndexIsValid(pos);
return MemorySegmentUtils::GetValue<int16_t>(segments_, GetFieldOffset(pos));
}
int32_t BinaryRow::GetInt(int32_t pos) const {
AssertIndexIsValid(pos);
return MemorySegmentUtils::GetValue<int32_t>(segments_, GetFieldOffset(pos));
}
int32_t BinaryRow::GetDate(int32_t pos) const {
return GetInt(pos);
}
int64_t BinaryRow::GetLong(int32_t pos) const {
AssertIndexIsValid(pos);
return MemorySegmentUtils::GetValue<int64_t>(segments_, GetFieldOffset(pos));
}
float BinaryRow::GetFloat(int32_t pos) const {
AssertIndexIsValid(pos);
return MemorySegmentUtils::GetValue<float>(segments_, GetFieldOffset(pos));
}
double BinaryRow::GetDouble(int32_t pos) const {
AssertIndexIsValid(pos);
return MemorySegmentUtils::GetValue<double>(segments_, GetFieldOffset(pos));
}
BinaryString BinaryRow::GetString(int32_t pos) const {
AssertIndexIsValid(pos);
int32_t field_offset = GetFieldOffset(pos);
const auto offset_and_len = MemorySegmentUtils::GetValue<int64_t>(segments_, field_offset);
return BinaryDataReadUtils::ReadBinaryString(segments_, offset_, field_offset, offset_and_len);
}
Decimal BinaryRow::GetDecimal(int32_t pos, int32_t precision, int32_t scale) const {
AssertIndexIsValid(pos);
int32_t field_offset = GetFieldOffset(pos);
if (Decimal::IsCompact(precision)) {
return Decimal::FromUnscaledLong(
MemorySegmentUtils::GetValue<int64_t>(segments_, field_offset), precision, scale);
}
const auto offset_and_size = MemorySegmentUtils::GetValue<int64_t>(segments_, field_offset);
return BinaryDataReadUtils::ReadDecimal(segments_, offset_, offset_and_size, precision, scale);
}
Timestamp BinaryRow::GetTimestamp(int32_t pos, int32_t precision) const {
AssertIndexIsValid(pos);
int32_t field_offset = GetFieldOffset(pos);
if (Timestamp::IsCompact(precision)) {
return Timestamp::FromEpochMillis(
MemorySegmentUtils::GetValue<int64_t>(segments_, field_offset));
}
const auto offset_and_nano_of_milli =
MemorySegmentUtils::GetValue<int64_t>(segments_, field_offset);
return BinaryDataReadUtils::ReadTimestampData(segments_, offset_, offset_and_nano_of_milli);
}
std::shared_ptr<Bytes> BinaryRow::GetBinary(int32_t pos) const {
AssertIndexIsValid(pos);
int32_t field_offset = GetFieldOffset(pos);
const auto offset_and_len = MemorySegmentUtils::GetValue<int64_t>(segments_, field_offset);
return BinarySection::ReadBinary(segments_, offset_, field_offset, offset_and_len,
GetDefaultPool().get());
}
std::shared_ptr<InternalArray> BinaryRow::GetArray(int32_t pos) const {
AssertIndexIsValid(pos);
return BinaryDataReadUtils::ReadArrayData(segments_, offset_, GetLong(pos));
}
std::shared_ptr<InternalMap> BinaryRow::GetMap(int32_t pos) const {
AssertIndexIsValid(pos);
return nullptr;
// TODO(liancheng.lsz):
}
std::shared_ptr<InternalRow> BinaryRow::GetRow(int32_t pos, int32_t num_fields) const {
AssertIndexIsValid(pos);
return BinaryDataReadUtils::ReadRowData(segments_, num_fields, offset_, GetLong(pos));
}
/// The bit is 1 when the field is null. Default is 0.
bool BinaryRow::AnyNull() const {
// Skip the header.
if ((MemorySegmentUtils::GetValue<int64_t>(segments_, offset_) & FIRST_BYTE_ZERO) != 0) {
return true;
}
for (int32_t i = 8; i < null_bits_size_in_bytes_; i += 8) {
if (MemorySegmentUtils::GetValue<int64_t>(segments_, offset_ + i) != 0) {
return true;
}
}
return false;
}
bool BinaryRow::AnyNull(const std::vector<int32_t>& fields) const {
for (int32_t field : fields) {
if (IsNullAt(field)) {
return true;
}
}
return false;
}
BinaryRow BinaryRow::Copy(MemoryPool* pool) const {
BinaryRow row(arity_);
Copy(&row, pool);
return row;
}
void BinaryRow::Copy(BinaryRow* reuse, MemoryPool* pool) const {
CopyInternal(reuse, pool);
}
void BinaryRow::CopyInternal(BinaryRow* reuse, MemoryPool* pool) const {
std::shared_ptr<Bytes> bytes =
MemorySegmentUtils::CopyToBytes(segments_, offset_, size_in_bytes_, pool);
reuse->PointTo(MemorySegment::Wrap(bytes), 0, size_in_bytes_);
}
void BinaryRow::Clear() {
segments_.clear();
offset_ = 0;
size_in_bytes_ = 0;
}
bool BinaryRow::operator==(const BinaryRow& other) const {
if (this == &other) {
return true;
}
return size_in_bytes_ == other.size_in_bytes_ &&
MemorySegmentUtils::Equals(segments_, offset_, other.segments_, other.offset_,
size_in_bytes_);
}
int32_t BinaryRow::HashCode() const {
assert(segments_.size() > 0);
if (segments_.size() == 1) {
return MemorySegmentUtils::HashByWords(segments_, offset_, size_in_bytes_, nullptr);
}
return MemorySegmentUtils::HashByWords(segments_, offset_, size_in_bytes_,
GetDefaultPool().get());
}
} // namespace paimon