blob: 3028ec81f31415a4ebf7d5669e30d0b29223a998 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// Test for decimal128 alignment issue fix.
// Arrow decimal128 data may be 8-byte aligned but not 16-byte aligned.
// This test verifies that Gandiva handles such data correctly.
#include <gtest/gtest.h>
#include "arrow/array/array_decimal.h"
#include "arrow/array/builder_primitive.h"
#include "arrow/buffer.h"
#include "arrow/memory_pool.h"
#include "arrow/status.h"
#include "arrow/util/decimal.h"
#include "gandiva/decimal_type_util.h"
#include "gandiva/projector.h"
#include "gandiva/tests/test_util.h"
#include "gandiva/tree_expr_builder.h"
using arrow::Decimal128;
namespace gandiva {
class TestDecimalAlignment : public ::testing::Test {
public:
void SetUp() { pool_ = arrow::default_memory_pool(); }
protected:
arrow::MemoryPool* pool_;
};
// Create a decimal128 array with data at a specific alignment offset
// This simulates the real-world scenario where Arrow data from external sources
// (like JNI/Java) may not be 16-byte aligned.
std::shared_ptr<arrow::Array> MakeMisalignedDecimalArray(
const std::shared_ptr<arrow::Decimal128Type>& type,
const std::vector<Decimal128>& values, int alignment_offset) {
// Allocate buffer with extra space for misalignment
int64_t data_size = values.size() * 16; // 16 bytes per Decimal128
int64_t buffer_size = data_size + 16; // Extra space for offset
std::shared_ptr<arrow::Buffer> buffer;
ARROW_EXPECT_OK(arrow::AllocateBuffer(buffer_size).Value(&buffer));
// Calculate the starting offset to achieve desired alignment
// We want the data to be 8-byte aligned but NOT 16-byte aligned
uint8_t* raw_data = buffer->mutable_data();
uintptr_t addr = reinterpret_cast<uintptr_t>(raw_data);
// Find offset to get to 8-byte aligned but not 16-byte aligned address
int offset_to_8 = (8 - (addr % 8)) % 8;
int current_16_alignment = (addr + offset_to_8) % 16;
int final_offset;
if (alignment_offset == 8) {
// Want 8-byte aligned but NOT 16-byte aligned
if (current_16_alignment == 0) {
final_offset = offset_to_8 + 8; // Add 8 to break 16-byte alignment
} else {
final_offset = offset_to_8;
}
} else {
// Want 16-byte aligned
final_offset = (16 - (addr % 16)) % 16;
}
// Copy decimal values to the offset location
uint8_t* data_start = raw_data + final_offset;
for (size_t i = 0; i < values.size(); i++) {
memcpy(data_start + i * 16, values[i].ToBytes().data(), 16);
}
// Verify alignment
uintptr_t data_addr = reinterpret_cast<uintptr_t>(data_start);
EXPECT_EQ(data_addr % 8, 0) << "Data should be 8-byte aligned";
if (alignment_offset == 8) {
EXPECT_NE(data_addr % 16, 0) << "Data should NOT be 16-byte aligned";
}
// Create a sliced buffer starting at our offset
auto sliced_buffer = arrow::SliceBuffer(buffer, final_offset, data_size);
// Create validity buffer (all valid)
std::shared_ptr<arrow::Buffer> validity_buffer;
ARROW_EXPECT_OK(arrow::AllocateBuffer((values.size() + 7) / 8).Value(&validity_buffer));
memset(validity_buffer->mutable_data(), 0xFF, validity_buffer->size());
// Create the array with our misaligned data buffer
auto array_data = arrow::ArrayData::Make(type, static_cast<int64_t>(values.size()),
{validity_buffer, sliced_buffer});
return std::make_shared<arrow::Decimal128Array>(array_data);
}
// Test that decimal operations work correctly with 8-byte aligned (but not 16-byte
// aligned) data
TEST_F(TestDecimalAlignment, TestMisalignedDecimalSubtract) {
constexpr int32_t precision = 38;
constexpr int32_t scale = 17;
auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
auto field_a = arrow::field("a", decimal_type);
auto field_b = arrow::field("b", decimal_type);
auto schema = arrow::schema({field_a, field_b});
Decimal128TypePtr output_type;
auto status = DecimalTypeUtil::GetResultType(
DecimalTypeUtil::kOpSubtract, {decimal_type, decimal_type}, &output_type);
ASSERT_OK(status);
auto res = arrow::field("res", output_type);
auto node_a = TreeExprBuilder::MakeField(field_a);
auto node_b = TreeExprBuilder::MakeField(field_b);
auto subtract =
TreeExprBuilder::MakeFunction("subtract", {node_a, node_b}, output_type);
auto expr = TreeExprBuilder::MakeExpression(subtract, res);
std::shared_ptr<Projector> projector;
status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
ASSERT_OK(status);
// Create test data
std::vector<Decimal128> values_a = {Decimal128(100), Decimal128(200), Decimal128(300)};
std::vector<Decimal128> values_b = {Decimal128(10), Decimal128(20), Decimal128(30)};
// Create arrays with 8-byte alignment (but NOT 16-byte aligned)
auto array_a = MakeMisalignedDecimalArray(decimal_type, values_a, 8);
auto array_b = MakeMisalignedDecimalArray(decimal_type, values_b, 8);
auto in_batch = arrow::RecordBatch::Make(schema, 3, {array_a, array_b});
// This should NOT crash even with misaligned data
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);
ASSERT_OK(status);
// Verify results: 100-10=90, 200-20=180, 300-30=270
auto result = std::dynamic_pointer_cast<arrow::Decimal128Array>(outputs[0]);
ASSERT_NE(result, nullptr);
EXPECT_EQ(result->length(), 3);
}
// Create a misaligned output buffer for decimal128
std::shared_ptr<arrow::ArrayData> MakeMisalignedDecimalOutput(
const std::shared_ptr<arrow::Decimal128Type>& type, int64_t num_records,
int alignment_offset) {
// Allocate data buffer with extra space for misalignment
int64_t data_size = num_records * 16; // 16 bytes per Decimal128
int64_t buffer_size = data_size + 16; // Extra space for offset
std::shared_ptr<arrow::Buffer> buffer;
ARROW_EXPECT_OK(arrow::AllocateBuffer(buffer_size).Value(&buffer));
uint8_t* raw_data = const_cast<uint8_t*>(buffer->data());
uintptr_t addr = reinterpret_cast<uintptr_t>(raw_data);
// Find offset to get to 8-byte aligned but not 16-byte aligned address
int offset_to_8 = (8 - (addr % 8)) % 8;
int current_16_alignment = (addr + offset_to_8) % 16;
int final_offset;
if (alignment_offset == 8) {
if (current_16_alignment == 0) {
final_offset = offset_to_8 + 8;
} else {
final_offset = offset_to_8;
}
} else {
final_offset = (16 - (addr % 16)) % 16;
}
// Verify alignment
uintptr_t data_addr = reinterpret_cast<uintptr_t>(raw_data + final_offset);
EXPECT_EQ(data_addr % 8, 0) << "Data should be 8-byte aligned";
if (alignment_offset == 8) {
EXPECT_NE(data_addr % 16, 0) << "Data should NOT be 16-byte aligned";
}
auto sliced_buffer = arrow::SliceBuffer(buffer, final_offset, data_size);
// Create validity buffer
int64_t bitmap_size = (num_records + 7) / 8;
std::shared_ptr<arrow::Buffer> validity_buffer;
ARROW_EXPECT_OK(arrow::AllocateBuffer(bitmap_size).Value(&validity_buffer));
memset(const_cast<uint8_t*>(validity_buffer->data()), 0xFF, validity_buffer->size());
return arrow::ArrayData::Make(type, num_records, {validity_buffer, sliced_buffer});
}
// Test that decimal STORES work correctly with 8-byte aligned (but not 16-byte aligned)
// output
TEST_F(TestDecimalAlignment, TestMisalignedDecimalStore) {
constexpr int32_t precision = 38;
constexpr int32_t scale = 17;
auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
auto field_a = arrow::field("a", decimal_type);
auto field_b = arrow::field("b", decimal_type);
auto schema = arrow::schema({field_a, field_b});
Decimal128TypePtr output_type;
auto status = DecimalTypeUtil::GetResultType(
DecimalTypeUtil::kOpSubtract, {decimal_type, decimal_type}, &output_type);
ASSERT_OK(status);
auto res = arrow::field("res", output_type);
auto node_a = TreeExprBuilder::MakeField(field_a);
auto node_b = TreeExprBuilder::MakeField(field_b);
auto subtract =
TreeExprBuilder::MakeFunction("subtract", {node_a, node_b}, output_type);
auto expr = TreeExprBuilder::MakeExpression(subtract, res);
std::shared_ptr<Projector> projector;
status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
ASSERT_OK(status);
// Create ALIGNED input arrays (using standard Arrow allocation)
auto array_a = MakeArrowArrayDecimal(
decimal_type, {Decimal128(100), Decimal128(200), Decimal128(300)},
{true, true, true});
auto array_b = MakeArrowArrayDecimal(
decimal_type, {Decimal128(10), Decimal128(20), Decimal128(30)}, {true, true, true});
auto in_batch = arrow::RecordBatch::Make(schema, 3, {array_a, array_b});
// Create MISALIGNED output buffer (8-byte aligned but NOT 16-byte aligned)
auto output_data = MakeMisalignedDecimalOutput(output_type, 3, 8);
// This should NOT crash even with misaligned output buffer
status = projector->Evaluate(*in_batch, {output_data});
ASSERT_OK(status);
// Verify the output was written correctly
auto result = std::make_shared<arrow::Decimal128Array>(output_data);
EXPECT_EQ(result->length(), 3);
}
} // namespace gandiva