blob: e47b248c362ba0fb083bb4b92dd27e70f0216dd6 [file]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#pragma once
#include <memory>
#include <vector>
#include "paimon/common/data/binary_array.h"
#include "paimon/common/data/binary_map.h"
#include "paimon/common/data/binary_row.h"
#include "paimon/common/data/binary_string.h"
#include "paimon/common/data/internal_map.h"
#include "paimon/data/decimal.h"
#include "paimon/data/timestamp.h"
#include "paimon/type_fwd.h"
namespace paimon {
/// Utils for `MemorySegment`.
class BinaryDataReadUtils {
public:
BinaryDataReadUtils() = delete;
~BinaryDataReadUtils() = delete;
/// Gets an instance of `Timestamp` from underlying `MemorySegment`.
/// @param segment the underlying `MemorySegment`s
/// @param base_offset the base offset of current instance of TimestampData
/// @param offset_and_nanos the offset of milli-seconds part and nanoseconds
/// @return an instance of `Timestamp`
static Timestamp ReadTimestampData(const MemorySegment& segment, int32_t base_offset,
int64_t offset_and_nanos) {
auto nano_of_millisecond = static_cast<int32_t>(offset_and_nanos & LOW_BYTES_MASK);
auto sub_offset = static_cast<int32_t>(offset_and_nanos >> 32);
auto millisecond =
MemorySegmentUtils::GetValue<int64_t>({segment}, base_offset + sub_offset);
return Timestamp::FromEpochMillis(millisecond, nano_of_millisecond);
}
/// Gets an instance of `Decimal` from underlying `MemorySegment`.
static Decimal ReadDecimal(const MemorySegment& segment, int32_t base_offset,
int64_t offset_and_size, int32_t precision, int32_t scale) {
auto size = static_cast<int32_t>(offset_and_size & LOW_BYTES_MASK);
auto sub_offset = static_cast<int32_t>(offset_and_size >> 32);
auto bytes = Bytes::AllocateBytes(size, GetDefaultPool().get());
std::memset(bytes->data(), 0, bytes->size());
MemorySegmentUtils::CopyToBytes<Bytes>({segment}, base_offset + sub_offset, bytes.get(), 0,
size);
return Decimal::FromUnscaledBytes(precision, scale, bytes.get());
}
/// Get binary string, if len less than 8, will be include in variable_part_offset_and_len.
/// @note Need to consider the ByteOrder.
/// @param base_offset base offset of composite binary format.
/// @param field_offset absolute start offset of variable_part_offset_and_len.
/// @param variable_part_offset_and_len a long value, real data or offset and len.
static BinaryString ReadBinaryString(const MemorySegment& segment, int32_t base_offset,
int64_t field_offset,
int64_t variable_part_offset_and_len) {
int64_t mark = variable_part_offset_and_len & BinaryString::HIGHEST_FIRST_BIT;
if (mark == 0) {
const auto sub_offset = static_cast<int32_t>(variable_part_offset_and_len >> 32);
const auto len = static_cast<int32_t>(variable_part_offset_and_len & LOW_BYTES_MASK);
return BinaryString::FromAddress(segment, base_offset + sub_offset, len);
} else {
auto len = static_cast<int32_t>(
(static_cast<uint64_t>(variable_part_offset_and_len &
BinaryString::HIGHEST_SECOND_TO_EIGHTH_BIT)) >>
56);
if (SystemByteOrder() == ByteOrder::PAIMON_LITTLE_ENDIAN) {
return BinaryString::FromAddress(segment, field_offset, len);
} else {
// field_offset + 1 to skip header.
return BinaryString::FromAddress(segment, field_offset + 1, len);
}
}
}
static std::shared_ptr<InternalArray> ReadArrayData(const MemorySegment& segment,
int32_t base_offset,
int64_t offset_and_size) {
auto size = static_cast<int32_t>(offset_and_size & LOW_BYTES_MASK);
auto offset = static_cast<int32_t>(offset_and_size >> 32);
auto binary_array = std::make_shared<BinaryArray>();
binary_array->PointTo(segment, offset + base_offset, size);
return binary_array;
}
/// Gets an instance of `InternalRow` from underlying `MemorySegment`.
static std::shared_ptr<InternalRow> ReadRowData(const MemorySegment& segment,
int32_t num_fields, int32_t base_offset,
int64_t offset_and_size) {
auto size = static_cast<int32_t>(offset_and_size & LOW_BYTES_MASK);
auto offset = static_cast<int32_t>(offset_and_size >> 32);
auto row = std::make_shared<BinaryRow>(num_fields);
row->PointTo(segment, offset + base_offset, size);
return row;
}
static std::shared_ptr<InternalMap> ReadMapData(const MemorySegment& segment,
int32_t base_offset, int64_t offset_and_size) {
auto size = static_cast<int32_t>(offset_and_size & LOW_BYTES_MASK);
auto offset = static_cast<int32_t>(offset_and_size >> 32);
auto binary_map = std::make_shared<BinaryMap>();
binary_map->PointTo(segment, offset + base_offset, size);
return binary_map;
}
private:
static constexpr uint64_t LOW_BYTES_MASK = 0xFFFFFFFF;
};
} // namespace paimon