cpp/src/gandiva/tests/decimal_alignment_test.cc - arrow - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 // Test for decimal128 alignment issue fix.
 // Arrow decimal128 data may be 8-byte aligned but not 16-byte aligned.
 // This test verifies that Gandiva handles such data correctly.

 #include <gtest/gtest.h>

 #include "arrow/array/array_decimal.h"
 #include "arrow/array/builder_primitive.h"
 #include "arrow/buffer.h"
 #include "arrow/memory_pool.h"
 #include "arrow/status.h"
 #include "arrow/util/decimal.h"

 #include "gandiva/decimal_type_util.h"
 #include "gandiva/projector.h"
 #include "gandiva/tests/test_util.h"
 #include "gandiva/tree_expr_builder.h"

 using arrow::Decimal128;

 namespace gandiva {

 class TestDecimalAlignment : public ::testing::Test {
  public:
   void SetUp() { pool_ = arrow::default_memory_pool(); }

  protected:
   arrow::MemoryPool* pool_;
 };

 // Create a decimal128 array with data at a specific alignment offset
 // This simulates the real-world scenario where Arrow data from external sources
 // (like JNI/Java) may not be 16-byte aligned.
 std::shared_ptr<arrow::Array> MakeMisalignedDecimalArray(
     const std::shared_ptr<arrow::Decimal128Type>& type,
     const std::vector<Decimal128>& values, int alignment_offset) {
   // Allocate buffer with extra space for misalignment
   int64_t data_size = values.size() * 16;  // 16 bytes per Decimal128
   int64_t buffer_size = data_size + 16;    // Extra space for offset

   std::shared_ptr<arrow::Buffer> buffer;
   ARROW_EXPECT_OK(arrow::AllocateBuffer(buffer_size).Value(&buffer));

   // Calculate the starting offset to achieve desired alignment
   // We want the data to be 8-byte aligned but NOT 16-byte aligned
   uint8_t* raw_data = buffer->mutable_data();
   uintptr_t addr = reinterpret_cast<uintptr_t>(raw_data);

   // Find offset to get to 8-byte aligned but not 16-byte aligned address
   int offset_to_8 = (8 - (addr % 8)) % 8;
   int current_16_alignment = (addr + offset_to_8) % 16;

   int final_offset;
   if (alignment_offset == 8) {
     // Want 8-byte aligned but NOT 16-byte aligned
     if (current_16_alignment == 0) {
       final_offset = offset_to_8 + 8;  // Add 8 to break 16-byte alignment
     } else {
       final_offset = offset_to_8;
     }
   } else {
     // Want 16-byte aligned
     final_offset = (16 - (addr % 16)) % 16;
   }

   // Copy decimal values to the offset location
   uint8_t* data_start = raw_data + final_offset;
   for (size_t i = 0; i < values.size(); i++) {
     memcpy(data_start + i * 16, values[i].ToBytes().data(), 16);
   }

   // Verify alignment
   uintptr_t data_addr = reinterpret_cast<uintptr_t>(data_start);
   EXPECT_EQ(data_addr % 8, 0) << "Data should be 8-byte aligned";
   if (alignment_offset == 8) {
     EXPECT_NE(data_addr % 16, 0) << "Data should NOT be 16-byte aligned";
   }

   // Create a sliced buffer starting at our offset
   auto sliced_buffer = arrow::SliceBuffer(buffer, final_offset, data_size);

   // Create validity buffer (all valid)
   std::shared_ptr<arrow::Buffer> validity_buffer;
   ARROW_EXPECT_OK(arrow::AllocateBuffer((values.size() + 7) / 8).Value(&validity_buffer));
   memset(validity_buffer->mutable_data(), 0xFF, validity_buffer->size());

   // Create the array with our misaligned data buffer
   auto array_data = arrow::ArrayData::Make(type, static_cast<int64_t>(values.size()),
                                            {validity_buffer, sliced_buffer});

   return std::make_shared<arrow::Decimal128Array>(array_data);
 }

 // Test that decimal operations work correctly with 8-byte aligned (but not 16-byte
 // aligned) data
 TEST_F(TestDecimalAlignment, TestMisalignedDecimalSubtract) {
   constexpr int32_t precision = 38;
   constexpr int32_t scale = 17;
   auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
   auto field_a = arrow::field("a", decimal_type);
   auto field_b = arrow::field("b", decimal_type);
   auto schema = arrow::schema({field_a, field_b});

   Decimal128TypePtr output_type;
   auto status = DecimalTypeUtil::GetResultType(
       DecimalTypeUtil::kOpSubtract, {decimal_type, decimal_type}, &output_type);
   ASSERT_OK(status);

   auto res = arrow::field("res", output_type);
   auto node_a = TreeExprBuilder::MakeField(field_a);
   auto node_b = TreeExprBuilder::MakeField(field_b);
   auto subtract =
       TreeExprBuilder::MakeFunction("subtract", {node_a, node_b}, output_type);
   auto expr = TreeExprBuilder::MakeExpression(subtract, res);

   std::shared_ptr<Projector> projector;
   status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   ASSERT_OK(status);

   // Create test data
   std::vector<Decimal128> values_a = {Decimal128(100), Decimal128(200), Decimal128(300)};
   std::vector<Decimal128> values_b = {Decimal128(10), Decimal128(20), Decimal128(30)};

   // Create arrays with 8-byte alignment (but NOT 16-byte aligned)
   auto array_a = MakeMisalignedDecimalArray(decimal_type, values_a, 8);
   auto array_b = MakeMisalignedDecimalArray(decimal_type, values_b, 8);

   auto in_batch = arrow::RecordBatch::Make(schema, 3, {array_a, array_b});

   // This should NOT crash even with misaligned data
   arrow::ArrayVector outputs;
   status = projector->Evaluate(*in_batch, pool_, &outputs);
   ASSERT_OK(status);

   // Verify results: 100-10=90, 200-20=180, 300-30=270
   auto result = std::dynamic_pointer_cast<arrow::Decimal128Array>(outputs[0]);
   ASSERT_NE(result, nullptr);
   EXPECT_EQ(result->length(), 3);
 }

 // Create a misaligned output buffer for decimal128
 std::shared_ptr<arrow::ArrayData> MakeMisalignedDecimalOutput(
     const std::shared_ptr<arrow::Decimal128Type>& type, int64_t num_records,
     int alignment_offset) {
   // Allocate data buffer with extra space for misalignment
   int64_t data_size = num_records * 16;  // 16 bytes per Decimal128
   int64_t buffer_size = data_size + 16;  // Extra space for offset

   std::shared_ptr<arrow::Buffer> buffer;
   ARROW_EXPECT_OK(arrow::AllocateBuffer(buffer_size).Value(&buffer));

   uint8_t* raw_data = const_cast<uint8_t*>(buffer->data());
   uintptr_t addr = reinterpret_cast<uintptr_t>(raw_data);

   // Find offset to get to 8-byte aligned but not 16-byte aligned address
   int offset_to_8 = (8 - (addr % 8)) % 8;
   int current_16_alignment = (addr + offset_to_8) % 16;

   int final_offset;
   if (alignment_offset == 8) {
     if (current_16_alignment == 0) {
       final_offset = offset_to_8 + 8;
     } else {
       final_offset = offset_to_8;
     }
   } else {
     final_offset = (16 - (addr % 16)) % 16;
   }

   // Verify alignment
   uintptr_t data_addr = reinterpret_cast<uintptr_t>(raw_data + final_offset);
   EXPECT_EQ(data_addr % 8, 0) << "Data should be 8-byte aligned";
   if (alignment_offset == 8) {
     EXPECT_NE(data_addr % 16, 0) << "Data should NOT be 16-byte aligned";
   }

   auto sliced_buffer = arrow::SliceBuffer(buffer, final_offset, data_size);

   // Create validity buffer
   int64_t bitmap_size = (num_records + 7) / 8;
   std::shared_ptr<arrow::Buffer> validity_buffer;
   ARROW_EXPECT_OK(arrow::AllocateBuffer(bitmap_size).Value(&validity_buffer));
   memset(const_cast<uint8_t*>(validity_buffer->data()), 0xFF, validity_buffer->size());

   return arrow::ArrayData::Make(type, num_records, {validity_buffer, sliced_buffer});
 }

 // Test that decimal STORES work correctly with 8-byte aligned (but not 16-byte aligned)
 // output
 TEST_F(TestDecimalAlignment, TestMisalignedDecimalStore) {
   constexpr int32_t precision = 38;
   constexpr int32_t scale = 17;
   auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
   auto field_a = arrow::field("a", decimal_type);
   auto field_b = arrow::field("b", decimal_type);
   auto schema = arrow::schema({field_a, field_b});

   Decimal128TypePtr output_type;
   auto status = DecimalTypeUtil::GetResultType(
       DecimalTypeUtil::kOpSubtract, {decimal_type, decimal_type}, &output_type);
   ASSERT_OK(status);

   auto res = arrow::field("res", output_type);
   auto node_a = TreeExprBuilder::MakeField(field_a);
   auto node_b = TreeExprBuilder::MakeField(field_b);
   auto subtract =
       TreeExprBuilder::MakeFunction("subtract", {node_a, node_b}, output_type);
   auto expr = TreeExprBuilder::MakeExpression(subtract, res);

   std::shared_ptr<Projector> projector;
   status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
   ASSERT_OK(status);

   // Create ALIGNED input arrays (using standard Arrow allocation)
   auto array_a = MakeArrowArrayDecimal(
       decimal_type, {Decimal128(100), Decimal128(200), Decimal128(300)},
       {true, true, true});
   auto array_b = MakeArrowArrayDecimal(
       decimal_type, {Decimal128(10), Decimal128(20), Decimal128(30)}, {true, true, true});

   auto in_batch = arrow::RecordBatch::Make(schema, 3, {array_a, array_b});

   // Create MISALIGNED output buffer (8-byte aligned but NOT 16-byte aligned)
   auto output_data = MakeMisalignedDecimalOutput(output_type, 3, 8);

   // This should NOT crash even with misaligned output buffer
   status = projector->Evaluate(*in_batch, {output_data});
   ASSERT_OK(status);

   // Verify the output was written correctly
   auto result = std::make_shared<arrow::Decimal128Array>(output_data);
   EXPECT_EQ(result->length(), 3);
 }

 }  // namespace gandiva
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	// Test for decimal128 alignment issue fix.
	// Arrow decimal128 data may be 8-byte aligned but not 16-byte aligned.
	// This test verifies that Gandiva handles such data correctly.

	#include <gtest/gtest.h>

	#include "arrow/array/array_decimal.h"
	#include "arrow/array/builder_primitive.h"
	#include "arrow/buffer.h"
	#include "arrow/memory_pool.h"
	#include "arrow/status.h"
	#include "arrow/util/decimal.h"

	#include "gandiva/decimal_type_util.h"
	#include "gandiva/projector.h"
	#include "gandiva/tests/test_util.h"
	#include "gandiva/tree_expr_builder.h"

	using arrow::Decimal128;

	namespace gandiva {

	class TestDecimalAlignment : public ::testing::Test {
	public:
	void SetUp() { pool_ = arrow::default_memory_pool(); }

	protected:
	arrow::MemoryPool* pool_;
	};

	// Create a decimal128 array with data at a specific alignment offset
	// This simulates the real-world scenario where Arrow data from external sources
	// (like JNI/Java) may not be 16-byte aligned.
	std::shared_ptr<arrow::Array> MakeMisalignedDecimalArray(
	const std::shared_ptr<arrow::Decimal128Type>& type,
	const std::vector<Decimal128>& values, int alignment_offset) {
	// Allocate buffer with extra space for misalignment
	int64_t data_size = values.size() * 16; // 16 bytes per Decimal128
	int64_t buffer_size = data_size + 16; // Extra space for offset

	std::shared_ptr<arrow::Buffer> buffer;
	ARROW_EXPECT_OK(arrow::AllocateBuffer(buffer_size).Value(&buffer));

	// Calculate the starting offset to achieve desired alignment
	// We want the data to be 8-byte aligned but NOT 16-byte aligned
	uint8_t* raw_data = buffer->mutable_data();
	uintptr_t addr = reinterpret_cast<uintptr_t>(raw_data);

	// Find offset to get to 8-byte aligned but not 16-byte aligned address
	int offset_to_8 = (8 - (addr % 8)) % 8;
	int current_16_alignment = (addr + offset_to_8) % 16;

	int final_offset;
	if (alignment_offset == 8) {
	// Want 8-byte aligned but NOT 16-byte aligned
	if (current_16_alignment == 0) {
	final_offset = offset_to_8 + 8; // Add 8 to break 16-byte alignment
	} else {
	final_offset = offset_to_8;
	}
	} else {
	// Want 16-byte aligned
	final_offset = (16 - (addr % 16)) % 16;
	}

	// Copy decimal values to the offset location
	uint8_t* data_start = raw_data + final_offset;
	for (size_t i = 0; i < values.size(); i++) {
	memcpy(data_start + i * 16, values[i].ToBytes().data(), 16);
	}

	// Verify alignment
	uintptr_t data_addr = reinterpret_cast<uintptr_t>(data_start);
	EXPECT_EQ(data_addr % 8, 0) << "Data should be 8-byte aligned";
	if (alignment_offset == 8) {
	EXPECT_NE(data_addr % 16, 0) << "Data should NOT be 16-byte aligned";
	}

	// Create a sliced buffer starting at our offset
	auto sliced_buffer = arrow::SliceBuffer(buffer, final_offset, data_size);

	// Create validity buffer (all valid)
	std::shared_ptr<arrow::Buffer> validity_buffer;
	ARROW_EXPECT_OK(arrow::AllocateBuffer((values.size() + 7) / 8).Value(&validity_buffer));
	memset(validity_buffer->mutable_data(), 0xFF, validity_buffer->size());

	// Create the array with our misaligned data buffer
	auto array_data = arrow::ArrayData::Make(type, static_cast<int64_t>(values.size()),
	{validity_buffer, sliced_buffer});

	return std::make_shared<arrow::Decimal128Array>(array_data);
	}

	// Test that decimal operations work correctly with 8-byte aligned (but not 16-byte
	// aligned) data
	TEST_F(TestDecimalAlignment, TestMisalignedDecimalSubtract) {
	constexpr int32_t precision = 38;
	constexpr int32_t scale = 17;
	auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
	auto field_a = arrow::field("a", decimal_type);
	auto field_b = arrow::field("b", decimal_type);
	auto schema = arrow::schema({field_a, field_b});

	Decimal128TypePtr output_type;
	auto status = DecimalTypeUtil::GetResultType(
	DecimalTypeUtil::kOpSubtract, {decimal_type, decimal_type}, &output_type);
	ASSERT_OK(status);

	auto res = arrow::field("res", output_type);
	auto node_a = TreeExprBuilder::MakeField(field_a);
	auto node_b = TreeExprBuilder::MakeField(field_b);
	auto subtract =
	TreeExprBuilder::MakeFunction("subtract", {node_a, node_b}, output_type);
	auto expr = TreeExprBuilder::MakeExpression(subtract, res);

	std::shared_ptr<Projector> projector;
	status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
	ASSERT_OK(status);

	// Create test data
	std::vector<Decimal128> values_a = {Decimal128(100), Decimal128(200), Decimal128(300)};
	std::vector<Decimal128> values_b = {Decimal128(10), Decimal128(20), Decimal128(30)};

	// Create arrays with 8-byte alignment (but NOT 16-byte aligned)
	auto array_a = MakeMisalignedDecimalArray(decimal_type, values_a, 8);
	auto array_b = MakeMisalignedDecimalArray(decimal_type, values_b, 8);

	auto in_batch = arrow::RecordBatch::Make(schema, 3, {array_a, array_b});

	// This should NOT crash even with misaligned data
	arrow::ArrayVector outputs;
	status = projector->Evaluate(*in_batch, pool_, &outputs);
	ASSERT_OK(status);

	// Verify results: 100-10=90, 200-20=180, 300-30=270
	auto result = std::dynamic_pointer_cast<arrow::Decimal128Array>(outputs[0]);
	ASSERT_NE(result, nullptr);
	EXPECT_EQ(result->length(), 3);
	}

	// Create a misaligned output buffer for decimal128
	std::shared_ptr<arrow::ArrayData> MakeMisalignedDecimalOutput(
	const std::shared_ptr<arrow::Decimal128Type>& type, int64_t num_records,
	int alignment_offset) {
	// Allocate data buffer with extra space for misalignment
	int64_t data_size = num_records * 16; // 16 bytes per Decimal128
	int64_t buffer_size = data_size + 16; // Extra space for offset

	std::shared_ptr<arrow::Buffer> buffer;
	ARROW_EXPECT_OK(arrow::AllocateBuffer(buffer_size).Value(&buffer));

	uint8_t* raw_data = const_cast<uint8_t*>(buffer->data());
	uintptr_t addr = reinterpret_cast<uintptr_t>(raw_data);

	// Find offset to get to 8-byte aligned but not 16-byte aligned address
	int offset_to_8 = (8 - (addr % 8)) % 8;
	int current_16_alignment = (addr + offset_to_8) % 16;

	int final_offset;
	if (alignment_offset == 8) {
	if (current_16_alignment == 0) {
	final_offset = offset_to_8 + 8;
	} else {
	final_offset = offset_to_8;
	}
	} else {
	final_offset = (16 - (addr % 16)) % 16;
	}

	// Verify alignment
	uintptr_t data_addr = reinterpret_cast<uintptr_t>(raw_data + final_offset);
	EXPECT_EQ(data_addr % 8, 0) << "Data should be 8-byte aligned";
	if (alignment_offset == 8) {
	EXPECT_NE(data_addr % 16, 0) << "Data should NOT be 16-byte aligned";
	}

	auto sliced_buffer = arrow::SliceBuffer(buffer, final_offset, data_size);

	// Create validity buffer
	int64_t bitmap_size = (num_records + 7) / 8;
	std::shared_ptr<arrow::Buffer> validity_buffer;
	ARROW_EXPECT_OK(arrow::AllocateBuffer(bitmap_size).Value(&validity_buffer));
	memset(const_cast<uint8_t*>(validity_buffer->data()), 0xFF, validity_buffer->size());

	return arrow::ArrayData::Make(type, num_records, {validity_buffer, sliced_buffer});
	}

	// Test that decimal STORES work correctly with 8-byte aligned (but not 16-byte aligned)
	// output
	TEST_F(TestDecimalAlignment, TestMisalignedDecimalStore) {
	constexpr int32_t precision = 38;
	constexpr int32_t scale = 17;
	auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
	auto field_a = arrow::field("a", decimal_type);
	auto field_b = arrow::field("b", decimal_type);
	auto schema = arrow::schema({field_a, field_b});

	Decimal128TypePtr output_type;
	auto status = DecimalTypeUtil::GetResultType(
	DecimalTypeUtil::kOpSubtract, {decimal_type, decimal_type}, &output_type);
	ASSERT_OK(status);

	auto res = arrow::field("res", output_type);
	auto node_a = TreeExprBuilder::MakeField(field_a);
	auto node_b = TreeExprBuilder::MakeField(field_b);
	auto subtract =
	TreeExprBuilder::MakeFunction("subtract", {node_a, node_b}, output_type);
	auto expr = TreeExprBuilder::MakeExpression(subtract, res);

	std::shared_ptr<Projector> projector;
	status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
	ASSERT_OK(status);

	// Create ALIGNED input arrays (using standard Arrow allocation)
	auto array_a = MakeArrowArrayDecimal(
	decimal_type, {Decimal128(100), Decimal128(200), Decimal128(300)},
	{true, true, true});
	auto array_b = MakeArrowArrayDecimal(
	decimal_type, {Decimal128(10), Decimal128(20), Decimal128(30)}, {true, true, true});

	auto in_batch = arrow::RecordBatch::Make(schema, 3, {array_a, array_b});

	// Create MISALIGNED output buffer (8-byte aligned but NOT 16-byte aligned)
	auto output_data = MakeMisalignedDecimalOutput(output_type, 3, 8);

	// This should NOT crash even with misaligned output buffer
	status = projector->Evaluate(*in_batch, {output_data});
	ASSERT_OK(status);

	// Verify the output was written correctly
	auto result = std::make_shared<arrow::Decimal128Array>(output_data);
	EXPECT_EQ(result->length(), 3);
	}

	} // namespace gandiva