blob: 29ccbd5ecc179d7c9cc183283c3da6c19a8376a8 [file] [log] [blame]
/*
* Copyright 2024-present Alibaba Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "paimon/common/data/binary_array.h"
#include <cassert>
#include <cstddef>
#include <utility>
#include "paimon/common/data/binary_array_writer.h"
#include "paimon/common/data/binary_data_read_utils.h"
#include "paimon/common/memory/memory_segment.h"
#include "paimon/memory/memory_pool.h"
namespace paimon {
int32_t BinaryArray::CalculateHeaderInBytes(int32_t num_fields) {
return 4 + ((num_fields + 31) / 32) * 4;
}
void BinaryArray::AssertIndexIsValid(int32_t ordinal) const {
assert(ordinal >= 0);
assert(ordinal < size_);
}
int32_t BinaryArray::GetElementOffset(int32_t ordinal, int32_t element_size) const {
return element_offset_ + ordinal * element_size;
}
void BinaryArray::PointTo(const MemorySegment& segment, int32_t offset, int32_t size_in_bytes) {
std::vector<MemorySegment> segments = {segment};
PointTo(segments, offset, size_in_bytes);
}
void BinaryArray::PointTo(const std::vector<MemorySegment>& segments, int32_t offset,
int32_t size_in_bytes) {
// Read the number of elements from the first 4 bytes.
auto size = MemorySegmentUtils::GetValue<int32_t>(segments, offset);
assert(size >= 0);
size_ = size;
segments_ = segments;
offset_ = offset;
size_in_bytes_ = size_in_bytes;
element_offset_ = offset_ + CalculateHeaderInBytes(size_);
}
bool BinaryArray::IsNullAt(int32_t pos) const {
AssertIndexIsValid(pos);
return MemorySegmentUtils::BitGet(segments_, offset_ + 4, pos);
}
int64_t BinaryArray::GetLong(int32_t pos) const {
AssertIndexIsValid(pos);
return MemorySegmentUtils::GetValue<int64_t>(segments_, GetElementOffset(pos, 8));
}
int32_t BinaryArray::GetInt(int32_t pos) const {
AssertIndexIsValid(pos);
return MemorySegmentUtils::GetValue<int32_t>(segments_, GetElementOffset(pos, 4));
}
int32_t BinaryArray::GetDate(int32_t pos) const {
return GetInt(pos);
}
BinaryString BinaryArray::GetString(int32_t pos) const {
AssertIndexIsValid(pos);
int32_t field_offset = GetElementOffset(pos, 8);
const auto offset_and_size = MemorySegmentUtils::GetValue<int64_t>(segments_, field_offset);
return BinaryDataReadUtils::ReadBinaryString(segments_, offset_, field_offset, offset_and_size);
}
Decimal BinaryArray::GetDecimal(int32_t pos, int32_t precision, int32_t scale) const {
AssertIndexIsValid(pos);
if (Decimal::IsCompact(precision)) {
return Decimal::FromUnscaledLong(
MemorySegmentUtils::GetValue<int64_t>(segments_, GetElementOffset(pos, 8)), precision,
scale);
}
int32_t field_offset = GetElementOffset(pos, 8);
const auto offset_and_size = MemorySegmentUtils::GetValue<int64_t>(segments_, field_offset);
return BinaryDataReadUtils::ReadDecimal(segments_, offset_, offset_and_size, precision, scale);
}
Timestamp BinaryArray::GetTimestamp(int32_t pos, int32_t precision) const {
AssertIndexIsValid(pos);
if (Timestamp::IsCompact(precision)) {
return Timestamp::FromEpochMillis(
MemorySegmentUtils::GetValue<int64_t>(segments_, GetElementOffset(pos, 8)));
}
int32_t field_offset = GetElementOffset(pos, 8);
const auto offset_and_nano_of_milli =
MemorySegmentUtils::GetValue<int64_t>(segments_, field_offset);
return BinaryDataReadUtils::ReadTimestampData(segments_, offset_, offset_and_nano_of_milli);
}
std::shared_ptr<Bytes> BinaryArray::GetBinary(int32_t pos) const {
AssertIndexIsValid(pos);
int32_t field_offset = GetElementOffset(pos, 8);
const auto offset_and_size = MemorySegmentUtils::GetValue<int64_t>(segments_, field_offset);
return BinarySection::ReadBinary(segments_, offset_, field_offset, offset_and_size,
GetDefaultPool().get());
}
std::shared_ptr<InternalArray> BinaryArray::GetArray(int32_t pos) const {
AssertIndexIsValid(pos);
return BinaryDataReadUtils::ReadArrayData(segments_, offset_, GetLong(pos));
}
std::shared_ptr<InternalMap> BinaryArray::GetMap(int32_t pos) const {
AssertIndexIsValid(pos);
return BinaryDataReadUtils::ReadMapData(segments_, offset_, GetLong(pos));
}
std::shared_ptr<InternalRow> BinaryArray::GetRow(int32_t pos, int32_t num_fields) const {
AssertIndexIsValid(pos);
int32_t field_offset = GetElementOffset(pos, 8);
const auto offset_and_size = MemorySegmentUtils::GetValue<int64_t>(segments_, field_offset);
return BinaryDataReadUtils::ReadRowData(segments_, num_fields, offset_, offset_and_size);
}
bool BinaryArray::GetBoolean(int32_t pos) const {
AssertIndexIsValid(pos);
return MemorySegmentUtils::GetValue<bool>(segments_, GetElementOffset(pos, 1));
}
char BinaryArray::GetByte(int32_t pos) const {
AssertIndexIsValid(pos);
return MemorySegmentUtils::GetValue<char>(segments_, GetElementOffset(pos, 1));
}
int16_t BinaryArray::GetShort(int32_t pos) const {
AssertIndexIsValid(pos);
return MemorySegmentUtils::GetValue<int16_t>(segments_, GetElementOffset(pos, sizeof(int16_t)));
}
float BinaryArray::GetFloat(int32_t pos) const {
AssertIndexIsValid(pos);
return MemorySegmentUtils::GetValue<float>(segments_, GetElementOffset(pos, sizeof(float)));
}
double BinaryArray::GetDouble(int32_t pos) const {
AssertIndexIsValid(pos);
return MemorySegmentUtils::GetValue<double>(segments_, GetElementOffset(pos, sizeof(double)));
}
bool BinaryArray::AnyNull() const {
for (int32_t i = offset_ + 4; i < element_offset_; i += 4) {
if (MemorySegmentUtils::GetValue<int32_t>(segments_, i) != 0) {
return true;
}
}
return false;
}
Result<std::vector<char>> BinaryArray::ToBooleanArray() const {
PAIMON_RETURN_NOT_OK(CheckNoNull());
std::vector<char> values;
values.resize(size_);
MemorySegmentUtils::CopyToUnsafe(segments_, element_offset_,
const_cast<void*>(static_cast<const void*>(values.data())),
size_);
return values;
}
Result<std::vector<char>> BinaryArray::ToByteArray() const {
PAIMON_RETURN_NOT_OK(CheckNoNull());
std::vector<char> values;
values.resize(size_);
MemorySegmentUtils::CopyToUnsafe(segments_, element_offset_,
const_cast<void*>(static_cast<const void*>(values.data())),
size_);
return values;
}
Result<std::vector<int16_t>> BinaryArray::ToShortArray() const {
PAIMON_RETURN_NOT_OK(CheckNoNull());
std::vector<int16_t> values;
values.resize(size_);
MemorySegmentUtils::CopyToUnsafe(segments_, element_offset_,
const_cast<void*>(static_cast<const void*>(values.data())),
size_ * sizeof(int16_t));
return values;
}
Result<std::vector<int32_t>> BinaryArray::ToIntArray() const {
PAIMON_RETURN_NOT_OK(CheckNoNull());
std::vector<int32_t> values;
values.resize(size_);
MemorySegmentUtils::CopyToUnsafe(segments_, element_offset_,
const_cast<void*>(static_cast<const void*>(values.data())),
size_ * sizeof(int32_t));
return values;
}
Result<std::vector<int64_t>> BinaryArray::ToLongArray() const {
PAIMON_RETURN_NOT_OK(CheckNoNull());
std::vector<int64_t> values;
values.resize(size_);
MemorySegmentUtils::CopyToUnsafe(segments_, element_offset_,
const_cast<void*>(static_cast<const void*>(values.data())),
size_ * sizeof(int64_t));
return values;
}
Result<std::vector<float>> BinaryArray::ToFloatArray() const {
PAIMON_RETURN_NOT_OK(CheckNoNull());
std::vector<float> values;
values.resize(size_);
MemorySegmentUtils::CopyToUnsafe(segments_, element_offset_,
const_cast<void*>(static_cast<const void*>(values.data())),
size_ * sizeof(float));
return values;
}
Result<std::vector<double>> BinaryArray::ToDoubleArray() const {
PAIMON_RETURN_NOT_OK(CheckNoNull());
std::vector<double> values;
values.resize(size_);
MemorySegmentUtils::CopyToUnsafe(segments_, element_offset_,
const_cast<void*>(static_cast<const void*>(values.data())),
size_ * sizeof(double));
return values;
}
BinaryArray BinaryArray::Copy(MemoryPool* pool) const {
BinaryArray array;
Copy(&array, pool);
return array;
}
void BinaryArray::Copy(BinaryArray* reuse, MemoryPool* pool) const {
std::shared_ptr<Bytes> bytes =
MemorySegmentUtils::CopyToBytes(segments_, offset_, size_in_bytes_, pool);
reuse->PointTo(MemorySegment::Wrap(bytes), 0, size_in_bytes_);
}
BinaryArray BinaryArray::FromIntArray(const std::vector<int32_t>& arr, MemoryPool* pool) {
BinaryArray array;
BinaryArrayWriter writer = BinaryArrayWriter(&array, arr.size(), sizeof(int32_t), pool);
for (size_t i = 0; i < arr.size(); i++) {
int32_t v = arr[i];
writer.WriteInt(i, v);
}
writer.Complete();
return array;
}
BinaryArray BinaryArray::FromLongArray(const std::vector<int64_t>& arr, MemoryPool* pool) {
BinaryArray array;
BinaryArrayWriter writer = BinaryArrayWriter(&array, arr.size(), sizeof(int64_t), pool);
for (size_t i = 0; i < arr.size(); i++) {
int64_t v = arr[i];
writer.WriteLong(i, v);
}
writer.Complete();
return array;
}
BinaryArray BinaryArray::FromLongArray(const InternalArray* arr, MemoryPool* pool) {
assert(arr);
auto cast_array = dynamic_cast<const BinaryArray*>(arr);
if (cast_array) {
return *cast_array;
}
BinaryArray array;
BinaryArrayWriter writer = BinaryArrayWriter(&array, arr->Size(), 8, pool);
std::vector<bool> is_null(arr->Size(), false);
// accessing the null bit first makes memory access more concentrated
for (int32_t i = 0; i < arr->Size(); i++) {
is_null[i] = arr->IsNullAt(i);
}
for (int32_t i = 0; i < arr->Size(); i++) {
if (is_null[i]) {
writer.SetNullValue<int64_t>(i);
} else {
writer.WriteLong(i, arr->GetLong(i));
}
}
writer.Complete();
return array;
}
} // namespace paimon