blob: a5221bf99aaddfd019a7ee2e79a6ce15120d1c19 [file]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "paimon/common/data/blob_descriptor.h"
#include <memory>
#include <vector>
#include "gtest/gtest.h"
#include "paimon/memory/memory_pool.h"
#include "paimon/status.h"
#include "paimon/testing/utils/testharness.h"
namespace paimon::test {
class BlobDescriptorTest : public testing::Test {
public:
void SetUp() override {
pool_ = GetDefaultPool();
ASSERT_OK_AND_ASSIGN(descriptor_,
BlobDescriptor::Create("test_uri", /*offset=*/1024, /*length=*/2048));
}
private:
std::shared_ptr<MemoryPool> pool_;
std::unique_ptr<BlobDescriptor> descriptor_;
};
TEST_F(BlobDescriptorTest, TestConstructorAndGetters) {
ASSERT_EQ(descriptor_->Uri(), "test_uri");
ASSERT_EQ(descriptor_->Offset(), 1024);
ASSERT_EQ(descriptor_->Length(), 2048);
}
TEST_F(BlobDescriptorTest, TestDeserializeCompatibilityForJavaWithVersion1) {
std::vector<char> bytes = {1, 8, 0, 0, 0, 116, 101, 115, 116, 95, 117, 114, 105, 0, 4,
0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0};
auto java_serialized = std::string(bytes.data(), bytes.size());
ASSERT_OK_AND_ASSIGN(auto descriptor, BlobDescriptor::Deserialize(java_serialized.data(),
java_serialized.size()));
ASSERT_EQ(descriptor->Version(), (int8_t)1);
ASSERT_EQ(descriptor->Uri(), "test_uri");
ASSERT_EQ(descriptor->Offset(), 1024);
ASSERT_EQ(descriptor->Length(), 2048);
}
TEST_F(BlobDescriptorTest, TestDeserializeCompatibilityForJavaWithVersion2) {
std::vector<char> bytes = {2, 67, 83, 69, 68, 66, 79, 76, 66, 8, 0, 0, 0,
116, 101, 115, 116, 95, 117, 114, 105, 0, 4, 0, 0, 0,
0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0};
auto java_serialized = std::string(bytes.data(), bytes.size());
ASSERT_OK_AND_ASSIGN(auto descriptor, BlobDescriptor::Deserialize(java_serialized.data(),
java_serialized.size()));
ASSERT_EQ(descriptor->Version(), (int8_t)2);
ASSERT_EQ(descriptor->Uri(), "test_uri");
ASSERT_EQ(descriptor->Offset(), 1024);
ASSERT_EQ(descriptor->Length(), 2048);
PAIMON_UNIQUE_PTR<Bytes> cpp_serialized = descriptor->Serialize(pool_);
auto cpp_serialized_string = std::string(cpp_serialized->data(), cpp_serialized->size());
ASSERT_EQ(cpp_serialized_string, java_serialized);
}
TEST_F(BlobDescriptorTest, TestSerializeDeserializeWithEmptyUri) {
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BlobDescriptor> empty_uri_descriptor,
BlobDescriptor::Create(/*uri=*/"", /*offset=*/0, /*length=*/0));
auto serialized = empty_uri_descriptor->Serialize(pool_);
ASSERT_OK_AND_ASSIGN(auto restored_descriptor,
BlobDescriptor::Deserialize(serialized->data(), serialized->size()));
ASSERT_EQ(restored_descriptor->Uri(), "");
ASSERT_EQ(restored_descriptor->Offset(), 0);
ASSERT_EQ(restored_descriptor->Length(), 0);
}
TEST_F(BlobDescriptorTest, TestSerializeDeserializeWithDynamicLength) {
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BlobDescriptor> empty_uri_descriptor,
BlobDescriptor::Create("test_uri", /*offset=*/0, /*length=*/-1));
auto serialized = empty_uri_descriptor->Serialize(pool_);
ASSERT_OK_AND_ASSIGN(auto restored_descriptor,
BlobDescriptor::Deserialize(serialized->data(), serialized->size()));
ASSERT_EQ(restored_descriptor->Uri(), "test_uri");
ASSERT_EQ(restored_descriptor->Offset(), 0);
ASSERT_EQ(restored_descriptor->Length(), -1);
}
TEST_F(BlobDescriptorTest, TestInvalidParameters) {
// Test deserialize invalid version
{
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BlobDescriptor> descriptor,
BlobDescriptor::Create(/*uri=*/"test", /*offset=*/1, /*length=*/2));
auto serialized = descriptor->Serialize(pool_);
(*serialized)[0] = '\x03';
ASSERT_NOK_WITH_MSG(
BlobDescriptor::Deserialize(serialized->data(), serialized->size()),
"Expecting BlobDescriptor version to be less than or equal to 2, but found 3");
}
// Test deserialize invalid buffer size
{
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BlobDescriptor> descriptor,
BlobDescriptor::Create(/*uri=*/"test", /*offset=*/1, /*length=*/2));
auto serialized = descriptor->Serialize(pool_);
ASSERT_NOK(BlobDescriptor::Deserialize(serialized->data(), /*size=*/5));
}
// Test invalid offset
{
ASSERT_NOK_WITH_MSG(BlobDescriptor::Create(/*uri=*/"test", /*offset=*/-1, /*length=*/2),
"offset -1 is less than 0");
}
// Test invalid length
{
ASSERT_NOK_WITH_MSG(BlobDescriptor::Create(/*uri=*/"test", /*offset=*/1, /*length=*/-2),
"length -2 is less than -1");
}
}
TEST_F(BlobDescriptorTest, TestToString) {
std::string debug_str = descriptor_->ToString();
ASSERT_FALSE(debug_str.empty());
ASSERT_TRUE(debug_str.find("version=2") != std::string::npos);
ASSERT_TRUE(debug_str.find("uri='test_uri'") != std::string::npos);
ASSERT_TRUE(debug_str.find("offset=1024") != std::string::npos);
ASSERT_TRUE(debug_str.find("length=2048") != std::string::npos);
}
TEST_F(BlobDescriptorTest, TestRoundTripConsistency) {
auto first_serialized = descriptor_->Serialize(pool_);
ASSERT_OK_AND_ASSIGN(
auto first_restored,
BlobDescriptor::Deserialize(first_serialized->data(), first_serialized->size()));
auto second_serialized = first_restored->Serialize(pool_);
ASSERT_EQ(*first_serialized, *second_serialized);
ASSERT_OK_AND_ASSIGN(
auto second_restored,
BlobDescriptor::Deserialize(second_serialized->data(), second_serialized->size()));
ASSERT_EQ(second_restored->Uri(), "test_uri");
ASSERT_EQ(second_restored->Offset(), 1024);
ASSERT_EQ(second_restored->Length(), 2048);
}
TEST_F(BlobDescriptorTest, TestIsBlobDescriptorWithValidDescriptor) {
// A valid v2 descriptor should be recognized
auto serialized = descriptor_->Serialize(pool_);
ASSERT_OK_AND_ASSIGN(bool result,
BlobDescriptor::IsBlobDescriptor(serialized->data(), serialized->size()));
ASSERT_TRUE(result);
}
TEST_F(BlobDescriptorTest, TestIsBlobDescriptorWithTooShortBuffer) {
// Buffer shorter than 9 bytes should return false
std::vector<char> short_buffer = {0x02, 0x43, 0x53, 0x45, 0x44, 0x42, 0x4F, 0x4C};
ASSERT_OK_AND_ASSIGN(
bool result, BlobDescriptor::IsBlobDescriptor(short_buffer.data(), short_buffer.size()));
ASSERT_FALSE(result);
// Empty buffer
ASSERT_OK_AND_ASSIGN(bool empty_result, BlobDescriptor::IsBlobDescriptor(nullptr, 0));
ASSERT_FALSE(empty_result);
}
TEST_F(BlobDescriptorTest, TestIsBlobDescriptorWithFutureVersion) {
// Version > CURRENT_VERSION should return false (not an error)
auto serialized = descriptor_->Serialize(pool_);
(*serialized)[0] = '\x03'; // set version to 3 (> CURRENT_VERSION)
ASSERT_OK_AND_ASSIGN(bool result,
BlobDescriptor::IsBlobDescriptor(serialized->data(), serialized->size()));
ASSERT_FALSE(result);
}
TEST_F(BlobDescriptorTest, TestIsBlobDescriptorWithWrongMagic) {
// Wrong magic number should return false
auto serialized = descriptor_->Serialize(pool_);
// Corrupt the magic bytes (bytes 1-8)
(*serialized)[1] = '\x00';
(*serialized)[2] = '\x00';
ASSERT_OK_AND_ASSIGN(bool result,
BlobDescriptor::IsBlobDescriptor(serialized->data(), serialized->size()));
ASSERT_FALSE(result);
}
TEST_F(BlobDescriptorTest, TestIsBlobDescriptorWithRandomData) {
// Random data that doesn't match blob descriptor format
std::vector<char> random_data = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09};
ASSERT_OK_AND_ASSIGN(bool result,
BlobDescriptor::IsBlobDescriptor(random_data.data(), random_data.size()));
ASSERT_FALSE(result);
}
TEST_F(BlobDescriptorTest, TestIsBlobDescriptorWithVersion1Data) {
// v1 data: version=1, followed by uri_length (not magic), should return false
// because reading bytes 1-8 as magic won't match MAGIC constant
std::vector<char> v1_bytes = {1, 8, 0, 0, 0, 116, 101, 115, 116, 95, 117, 114, 105, 0, 4,
0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0};
ASSERT_OK_AND_ASSIGN(bool result,
BlobDescriptor::IsBlobDescriptor(v1_bytes.data(), v1_bytes.size()));
ASSERT_FALSE(result);
}
TEST_F(BlobDescriptorTest, TestIsBlobDescriptorWithExactly9Bytes) {
// Exactly 9 bytes with valid version and magic should return true
// version=2, magic=0x424C4F4244455343 in little-endian
std::vector<char> minimal = {0x02, 0x43, 0x53, 0x45, 0x44, 0x42, 0x4F, 0x4C, 0x42};
ASSERT_OK_AND_ASSIGN(bool result,
BlobDescriptor::IsBlobDescriptor(minimal.data(), minimal.size()));
ASSERT_TRUE(result);
}
} // namespace paimon::test