feat: add Decimal32/Decimal64 support (#683)
Initial implementation of Decimal32/Decimal64 support in nanoarrow.
diff --git a/.github/workflows/build-and-test-device.yaml b/.github/workflows/build-and-test-device.yaml
index 3304d8f..7fd2c38 100644
--- a/.github/workflows/build-and-test-device.yaml
+++ b/.github/workflows/build-and-test-device.yaml
@@ -85,7 +85,7 @@
if: steps.cache-arrow-build.outputs.cache-hit != 'true'
shell: bash
run: |
- ci/scripts/build-arrow-cpp-minimal.sh 15.0.2 arrow
+ ci/scripts/build-arrow-cpp-minimal.sh 18.0.0 arrow
- name: Build
run: |
diff --git a/.github/workflows/build-and-test-ipc.yaml b/.github/workflows/build-and-test-ipc.yaml
index cd28908..6defa4b 100644
--- a/.github/workflows/build-and-test-ipc.yaml
+++ b/.github/workflows/build-and-test-ipc.yaml
@@ -78,7 +78,7 @@
if: steps.cache-arrow-build.outputs.cache-hit != 'true'
shell: bash
run: |
- ci/scripts/build-arrow-cpp-minimal.sh 15.0.2 arrow
+ ci/scripts/build-arrow-cpp-minimal.sh 18.0.0 arrow
- name: Build
run: |
diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml
index 9432c35..ab9b8e8 100644
--- a/.github/workflows/build-and-test.yaml
+++ b/.github/workflows/build-and-test.yaml
@@ -70,7 +70,7 @@
if: steps.cache-arrow-build.outputs.cache-hit != 'true'
shell: bash
run: |
- ci/scripts/build-arrow-cpp-minimal.sh 15.0.2 arrow
+ ci/scripts/build-arrow-cpp-minimal.sh 18.0.0 arrow
- name: Build nanoarrow
run: |
@@ -154,7 +154,7 @@
if: steps.cache-arrow-build.outputs.cache-hit != 'true'
shell: bash
run: |
- ci/scripts/build-arrow-cpp-minimal.sh 16.0.0 arrow
+ ci/scripts/build-arrow-cpp-minimal.sh 18.0.0 arrow
- name: Run meson testing script
run: |
diff --git a/.github/workflows/clang-tidy.yaml b/.github/workflows/clang-tidy.yaml
index f7fbdfb..a5e4d1a 100644
--- a/.github/workflows/clang-tidy.yaml
+++ b/.github/workflows/clang-tidy.yaml
@@ -54,7 +54,7 @@
if: steps.cache-arrow-build.outputs.cache-hit != 'true'
shell: bash
run: |
- ci/scripts/build-arrow-cpp-minimal.sh 15.0.2 arrow
+ ci/scripts/build-arrow-cpp-minimal.sh 18.0.0 arrow
- name: Build nanoarrow
run: |
diff --git a/ci/docker/alpine.dockerfile b/ci/docker/alpine.dockerfile
index 1c9d513..9aeb0aa 100644
--- a/ci/docker/alpine.dockerfile
+++ b/ci/docker/alpine.dockerfile
@@ -23,7 +23,7 @@
# For Arrow C++
COPY ci/scripts/build-arrow-cpp-minimal.sh /
-RUN /build-arrow-cpp-minimal.sh 15.0.2 /arrow
+RUN /build-arrow-cpp-minimal.sh 18.0.0 /arrow
# There's a missing define that numpy's build needs on s390x and there is no wheel
RUN (grep -e "S390" /usr/include/bits/hwcap.h && echo "#define HWCAP_S390_VX HWCAP_S390_VXRS" >> /usr/include/bits/hwcap.h) || true
diff --git a/src/nanoarrow/common/array.c b/src/nanoarrow/common/array.c
index 3d04d0b..53cd4c6 100644
--- a/src/nanoarrow/common/array.c
+++ b/src/nanoarrow/common/array.c
@@ -104,6 +104,8 @@
case NANOARROW_TYPE_HALF_FLOAT:
case NANOARROW_TYPE_FLOAT:
case NANOARROW_TYPE_DOUBLE:
+ case NANOARROW_TYPE_DECIMAL32:
+ case NANOARROW_TYPE_DECIMAL64:
case NANOARROW_TYPE_DECIMAL128:
case NANOARROW_TYPE_DECIMAL256:
case NANOARROW_TYPE_INTERVAL_MONTHS:
diff --git a/src/nanoarrow/common/array_test.cc b/src/nanoarrow/common/array_test.cc
index 04d3b1d..dddc779 100644
--- a/src/nanoarrow/common/array_test.cc
+++ b/src/nanoarrow/common/array_test.cc
@@ -1245,6 +1245,94 @@
#endif
}
+TEST(ArrayTest, ArrayTestAppendToDecimal32Array) {
+ struct ArrowArray array;
+ struct ArrowDecimal decimal;
+
+ ArrowDecimalInit(&decimal, 32, 8, 3);
+ ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_DECIMAL32), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+
+ ArrowDecimalSetInt(&decimal, 12345);
+ EXPECT_EQ(ArrowArrayAppendDecimal(&array, &decimal), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK);
+
+ ArrowDecimalSetInt(&decimal, -67890);
+ EXPECT_EQ(ArrowArrayAppendDecimal(&array, &decimal), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
+ EXPECT_EQ(array.length, 4);
+ EXPECT_EQ(array.null_count, 2);
+ auto validity_buffer = reinterpret_cast<const uint8_t*>(array.buffers[0]);
+ auto data_buffer = reinterpret_cast<const uint8_t*>(array.buffers[1]);
+ EXPECT_EQ(validity_buffer[0], 0b00001001);
+
+ ArrowDecimalSetInt(&decimal, 12345);
+ EXPECT_EQ(memcmp(data_buffer, decimal.words, 4), 0);
+ ArrowDecimalSetInt(&decimal, -67890);
+ EXPECT_EQ(memcmp(data_buffer + 3 * 4, decimal.words, 4), 0);
+
+#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW) && ARROW_VERSION_MAJOR >= 18
+ auto arrow_array = ImportArray(&array, decimal32(8, 3));
+ ARROW_EXPECT_OK(arrow_array);
+
+ auto builder = Decimal32Builder(decimal32(8, 3));
+ ARROW_EXPECT_OK(builder.Append(*Decimal32::FromString("12.345")));
+ ARROW_EXPECT_OK(builder.AppendNulls(2));
+ ARROW_EXPECT_OK(builder.Append(*Decimal32::FromString("-67.890")));
+ auto expected_array = builder.Finish();
+
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe()));
+#else
+ ArrowArrayRelease(&array);
+#endif
+}
+
+TEST(ArrayTest, ArrayTestAppendToDecimal64Array) {
+ struct ArrowArray array;
+ struct ArrowDecimal decimal;
+
+ ArrowDecimalInit(&decimal, 64, 10, 3);
+ ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_DECIMAL64), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+
+ ArrowDecimalSetInt(&decimal, 12345);
+ EXPECT_EQ(ArrowArrayAppendDecimal(&array, &decimal), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK);
+
+ ArrowDecimalSetInt(&decimal, -67890);
+ EXPECT_EQ(ArrowArrayAppendDecimal(&array, &decimal), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
+ EXPECT_EQ(array.length, 4);
+ EXPECT_EQ(array.null_count, 2);
+ auto validity_buffer = reinterpret_cast<const uint8_t*>(array.buffers[0]);
+ auto data_buffer = reinterpret_cast<const uint8_t*>(array.buffers[1]);
+ EXPECT_EQ(validity_buffer[0], 0b00001001);
+
+ ArrowDecimalSetInt(&decimal, 12345);
+ EXPECT_EQ(memcmp(data_buffer, decimal.words, 8), 0);
+ ArrowDecimalSetInt(&decimal, -67890);
+ EXPECT_EQ(memcmp(data_buffer + 3 * 8, decimal.words, 8), 0);
+
+#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW) && ARROW_VERSION_MAJOR >= 18
+ auto arrow_array = ImportArray(&array, decimal64(10, 3));
+ ARROW_EXPECT_OK(arrow_array);
+
+ auto builder = Decimal64Builder(decimal64(10, 3));
+ ARROW_EXPECT_OK(builder.Append(*Decimal64::FromString("12.345")));
+ ARROW_EXPECT_OK(builder.AppendNulls(2));
+ ARROW_EXPECT_OK(builder.Append(*Decimal64::FromString("-67.890")));
+ auto expected_array = builder.Finish();
+
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe()));
+#else
+ ArrowArrayRelease(&array);
+#endif
+}
+
TEST(ArrayTest, ArrayTestAppendToDecimal128Array) {
struct ArrowArray array;
struct ArrowDecimal decimal;
@@ -3821,6 +3909,82 @@
ArrowArrayRelease(&array);
}
+#if ARROW_VERSION_MAJOR >= 18
+TEST(ArrayViewTest, ArrayViewTestGetDecimal32) {
+ struct ArrowArray array;
+ struct ArrowSchema schema;
+ struct ArrowArrayView array_view;
+ struct ArrowError error;
+
+ auto type = decimal32(8, 3);
+
+ // Array with nulls
+ auto builder = Decimal32Builder(type);
+ ARROW_EXPECT_OK(builder.Append(*Decimal32::FromReal(1.234, 8, 3)));
+ ARROW_EXPECT_OK(builder.AppendNulls(2));
+ ARROW_EXPECT_OK(builder.Append(*Decimal32::FromReal(-5.678, 8, 3)));
+ auto maybe_arrow_array = builder.Finish();
+ ARROW_EXPECT_OK(maybe_arrow_array);
+ auto arrow_array = maybe_arrow_array.ValueUnsafe();
+
+ ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
+ ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view, NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
+
+ ArrowDecimal decimal;
+ ArrowDecimalInit(&decimal, 32, 8, 3);
+
+ ArrowArrayViewGetDecimalUnsafe(&array_view, 0, &decimal);
+ EXPECT_EQ(ArrowDecimalGetIntUnsafe(&decimal), 1234);
+
+ ArrowArrayViewGetDecimalUnsafe(&array_view, 3, &decimal);
+ EXPECT_EQ(ArrowDecimalGetIntUnsafe(&decimal), -5678);
+
+ ArrowArrayViewReset(&array_view);
+ ArrowSchemaRelease(&schema);
+ ArrowArrayRelease(&array);
+}
+
+TEST(ArrayViewTest, ArrayViewTestGetDecimal64) {
+ struct ArrowArray array;
+ struct ArrowSchema schema;
+ struct ArrowArrayView array_view;
+ struct ArrowError error;
+
+ auto type = decimal64(10, 3);
+
+ // Array with nulls
+ auto builder = Decimal64Builder(type);
+ ARROW_EXPECT_OK(builder.Append(*Decimal64::FromReal(1.234, 10, 3)));
+ ARROW_EXPECT_OK(builder.AppendNulls(2));
+ ARROW_EXPECT_OK(builder.Append(*Decimal64::FromReal(-5.678, 10, 3)));
+ auto maybe_arrow_array = builder.Finish();
+ ARROW_EXPECT_OK(maybe_arrow_array);
+ auto arrow_array = maybe_arrow_array.ValueUnsafe();
+
+ ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
+ ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view, NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
+
+ ArrowDecimal decimal;
+ ArrowDecimalInit(&decimal, 64, 10, 3);
+
+ ArrowArrayViewGetDecimalUnsafe(&array_view, 0, &decimal);
+ EXPECT_EQ(ArrowDecimalGetIntUnsafe(&decimal), 1234);
+
+ ArrowArrayViewGetDecimalUnsafe(&array_view, 3, &decimal);
+ EXPECT_EQ(ArrowDecimalGetIntUnsafe(&decimal), -5678);
+
+ ArrowArrayViewReset(&array_view);
+ ArrowSchemaRelease(&schema);
+ ArrowArrayRelease(&array);
+}
+#endif
+
TEST(ArrayViewTest, ArrayViewTestGetDecimal128) {
struct ArrowArray array;
struct ArrowSchema schema;
diff --git a/src/nanoarrow/common/inline_array.h b/src/nanoarrow/common/inline_array.h
index e85228d..9fe5e0b 100644
--- a/src/nanoarrow/common/inline_array.h
+++ b/src/nanoarrow/common/inline_array.h
@@ -700,6 +700,22 @@
struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
switch (private_data->storage_type) {
+ case NANOARROW_TYPE_DECIMAL32:
+ if (value->n_words != 0) {
+ return EINVAL;
+ } else {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppend(data_buffer, value->words, sizeof(uint32_t)));
+ break;
+ }
+ case NANOARROW_TYPE_DECIMAL64:
+ if (value->n_words != 1) {
+ return EINVAL;
+ } else {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppend(data_buffer, value->words, sizeof(uint64_t)));
+ break;
+ }
case NANOARROW_TYPE_DECIMAL128:
if (value->n_words != 2) {
return EINVAL;
@@ -1267,6 +1283,12 @@
i += array_view->offset;
const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8;
switch (array_view->storage_type) {
+ case NANOARROW_TYPE_DECIMAL32:
+ ArrowDecimalSetBytes(out, data_view + (i * 4));
+ break;
+ case NANOARROW_TYPE_DECIMAL64:
+ ArrowDecimalSetBytes(out, data_view + (i * 8));
+ break;
case NANOARROW_TYPE_DECIMAL128:
ArrowDecimalSetBytes(out, data_view + (i * 16));
break;
diff --git a/src/nanoarrow/common/inline_types.h b/src/nanoarrow/common/inline_types.h
index 0a60261..d4fdfba 100644
--- a/src/nanoarrow/common/inline_types.h
+++ b/src/nanoarrow/common/inline_types.h
@@ -453,7 +453,9 @@
NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO,
NANOARROW_TYPE_RUN_END_ENCODED,
NANOARROW_TYPE_BINARY_VIEW,
- NANOARROW_TYPE_STRING_VIEW
+ NANOARROW_TYPE_STRING_VIEW,
+ NANOARROW_TYPE_DECIMAL32,
+ NANOARROW_TYPE_DECIMAL64
};
/// \brief Get a string value of an enum ArrowType value
@@ -510,6 +512,10 @@
return "interval_months";
case NANOARROW_TYPE_INTERVAL_DAY_TIME:
return "interval_day_time";
+ case NANOARROW_TYPE_DECIMAL32:
+ return "decimal32";
+ case NANOARROW_TYPE_DECIMAL64:
+ return "decimal64";
case NANOARROW_TYPE_DECIMAL128:
return "decimal128";
case NANOARROW_TYPE_DECIMAL256:
@@ -890,7 +896,8 @@
/// values set using ArrowDecimalSetInt(), ArrowDecimalSetBytes128(),
/// or ArrowDecimalSetBytes256().
struct ArrowDecimal {
- /// \brief An array of 64-bit integers of n_words length defined in native-endian order
+ /// \brief An array of 64-bit integers of n_words length defined in native-endian order.
+ /// For a 32-bit decimal value, index 0 will be a 32-bit integer value.
uint64_t words[4];
/// \brief The number of significant digits this decimal number can represent
@@ -899,7 +906,8 @@
/// \brief The number of digits after the decimal point. This can be negative.
int32_t scale;
- /// \brief The number of words in the words array
+ /// \brief The number of 64-bit words in the words array. For the special case of a
+ /// 32-bit decimal value, this will be 0.
int n_words;
/// \brief Cached value used by the implementation
@@ -916,13 +924,14 @@
memset(decimal->words, 0, sizeof(decimal->words));
decimal->precision = precision;
decimal->scale = scale;
+ // n_words will be 0 for bitwidth == 32
decimal->n_words = (int)(bitwidth / 8 / sizeof(uint64_t));
if (_ArrowIsLittleEndian()) {
decimal->low_word_index = 0;
- decimal->high_word_index = decimal->n_words - 1;
+ decimal->high_word_index = decimal->n_words > 0 ? decimal->n_words - 1 : 0;
} else {
- decimal->low_word_index = decimal->n_words - 1;
+ decimal->low_word_index = decimal->n_words > 0 ? decimal->n_words - 1 : 0;
decimal->high_word_index = 0;
}
}
@@ -933,6 +942,9 @@
/// within the signed 64-bit integer range (A precision less than or equal
/// to 18 is sufficiently small).
static inline int64_t ArrowDecimalGetIntUnsafe(const struct ArrowDecimal* decimal) {
+ if (decimal->n_words == 0) {
+ return (int32_t)decimal->words[0];
+ }
return (int64_t)decimal->words[decimal->low_word_index];
}
@@ -940,18 +952,28 @@
/// \ingroup nanoarrow-utils
static inline void ArrowDecimalGetBytes(const struct ArrowDecimal* decimal,
uint8_t* out) {
- memcpy(out, decimal->words, decimal->n_words * sizeof(uint64_t));
+ memcpy(out, decimal->words,
+ (decimal->n_words > 0 ? decimal->n_words : 1) * sizeof(uint64_t));
}
/// \brief Returns 1 if the value represented by decimal is >= 0 or -1 otherwise
/// \ingroup nanoarrow-utils
static inline int64_t ArrowDecimalSign(const struct ArrowDecimal* decimal) {
+ if (decimal->n_words == 0) {
+ return 1 | ((int32_t)(decimal->words[0]) >> 31);
+ }
+
return 1 | ((int64_t)(decimal->words[decimal->high_word_index]) >> 63);
}
/// \brief Sets the integer value of this decimal
/// \ingroup nanoarrow-utils
static inline void ArrowDecimalSetInt(struct ArrowDecimal* decimal, int64_t value) {
+ if (decimal->n_words == 0) {
+ decimal->words[0] = (int32_t)value;
+ return;
+ }
+
if (value < 0) {
memset(decimal->words, 0xff, decimal->n_words * sizeof(uint64_t));
} else {
@@ -964,6 +986,13 @@
/// \brief Negate the value of this decimal in place
/// \ingroup nanoarrow-utils
static inline void ArrowDecimalNegate(struct ArrowDecimal* decimal) {
+ if (decimal->n_words == 0) {
+ uint32_t elem = (uint32_t)decimal->words[0];
+ elem = ~elem + 1;
+ decimal->words[0] = (int32_t)elem;
+ return;
+ }
+
uint64_t carry = 1;
if (decimal->low_word_index == 0) {
@@ -987,7 +1016,11 @@
/// \ingroup nanoarrow-utils
static inline void ArrowDecimalSetBytes(struct ArrowDecimal* decimal,
const uint8_t* value) {
- memcpy(decimal->words, value, decimal->n_words * sizeof(uint64_t));
+ if (decimal->n_words == 0) {
+ memcpy(decimal->words, value, sizeof(int32_t));
+ } else {
+ memcpy(decimal->words, value, decimal->n_words * sizeof(uint64_t));
+ }
}
#ifdef __cplusplus
diff --git a/src/nanoarrow/common/schema.c b/src/nanoarrow/common/schema.c
index 28fb338..b0e538c 100644
--- a/src/nanoarrow/common/schema.c
+++ b/src/nanoarrow/common/schema.c
@@ -271,6 +271,14 @@
char buffer[64];
int n_chars;
switch (type) {
+ case NANOARROW_TYPE_DECIMAL32:
+ n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d,32", decimal_precision,
+ decimal_scale);
+ break;
+ case NANOARROW_TYPE_DECIMAL64:
+ n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d,64", decimal_precision,
+ decimal_scale);
+ break;
case NANOARROW_TYPE_DECIMAL128:
n_chars =
snprintf(buffer, sizeof(buffer), "d:%d,%d", decimal_precision, decimal_scale);
@@ -721,6 +729,12 @@
*format_end_out = parse_end;
switch (schema_view->decimal_bitwidth) {
+ case 32:
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL32);
+ return NANOARROW_OK;
+ case 64:
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL64);
+ return NANOARROW_OK;
case 128:
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL128);
return NANOARROW_OK;
@@ -1157,6 +1171,8 @@
case NANOARROW_TYPE_HALF_FLOAT:
case NANOARROW_TYPE_FLOAT:
case NANOARROW_TYPE_DOUBLE:
+ case NANOARROW_TYPE_DECIMAL32:
+ case NANOARROW_TYPE_DECIMAL64:
case NANOARROW_TYPE_DECIMAL128:
case NANOARROW_TYPE_DECIMAL256:
case NANOARROW_TYPE_STRING:
@@ -1316,6 +1332,8 @@
char* out, int64_t n) {
const char* type_string = ArrowTypeString(schema_view->type);
switch (schema_view->type) {
+ case NANOARROW_TYPE_DECIMAL32:
+ case NANOARROW_TYPE_DECIMAL64:
case NANOARROW_TYPE_DECIMAL128:
case NANOARROW_TYPE_DECIMAL256:
return snprintf(out, n, "%s(%" PRId32 ", %" PRId32 ")", type_string,
diff --git a/src/nanoarrow/common/schema_test.cc b/src/nanoarrow/common/schema_test.cc
index f05f5e0..d620a47 100644
--- a/src/nanoarrow/common/schema_test.cc
+++ b/src/nanoarrow/common/schema_test.cc
@@ -248,6 +248,30 @@
arrow_type = ImportType(&schema);
ARROW_EXPECT_OK(arrow_type);
EXPECT_TRUE(arrow_type.ValueUnsafe()->Equals(decimal256(3, 4)));
+
+ ArrowSchemaInit(&schema);
+ EXPECT_EQ(ArrowSchemaSetTypeDecimal(&schema, NANOARROW_TYPE_DECIMAL32, 3, 4),
+ NANOARROW_OK);
+ EXPECT_STREQ(schema.format, "d:3,4,32");
+#if ARROW_MAJOR_VERSION >= 18
+ arrow_type = ImportType(&schema);
+ ARROW_EXPECT_OK(arrow_type);
+ EXPECT_TRUE(arrow_type.ValueUnsafe()->Equals(decimal32(3, 4)));
+#else
+ ArrowSchemaRelease(&schema);
+#endif
+
+ ArrowSchemaInit(&schema);
+ EXPECT_EQ(ArrowSchemaSetTypeDecimal(&schema, NANOARROW_TYPE_DECIMAL64, 3, 4),
+ NANOARROW_OK);
+ EXPECT_STREQ(schema.format, "d:3,4,64");
+#if ARROW_MAJOR_VERSION >= 18
+ arrow_type = ImportType(&schema);
+ ARROW_EXPECT_OK(arrow_type);
+ EXPECT_TRUE(arrow_type.ValueUnsafe()->Equals(decimal64(3, 4)));
+#else
+ ArrowSchemaRelease(&schema);
+#endif
#else
ArrowSchemaRelease(&schema);
#endif
@@ -785,6 +809,46 @@
struct ArrowSchemaView schema_view;
struct ArrowError error;
+#if ARROW_MAJOR_VERSION >= 18
+ ARROW_EXPECT_OK(ExportType(*decimal32(5, 6), &schema));
+ EXPECT_EQ(ArrowSchemaViewInit(&schema_view, &schema, &error), NANOARROW_OK);
+ EXPECT_EQ(schema_view.type, NANOARROW_TYPE_DECIMAL32);
+ EXPECT_EQ(schema_view.storage_type, NANOARROW_TYPE_DECIMAL32);
+ EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA);
+ EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+ EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
+ EXPECT_EQ(schema_view.layout.buffer_data_type[1], NANOARROW_TYPE_DECIMAL32);
+ EXPECT_EQ(schema_view.layout.buffer_data_type[2], NANOARROW_TYPE_UNINITIALIZED);
+ EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(schema_view.layout.element_size_bits[1], 32);
+ EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
+ EXPECT_EQ(schema_view.decimal_bitwidth, 32);
+ EXPECT_EQ(schema_view.decimal_precision, 5);
+ EXPECT_EQ(schema_view.decimal_scale, 6);
+ EXPECT_EQ(ArrowSchemaToStdString(&schema), "decimal32(5, 6)");
+ ArrowSchemaRelease(&schema);
+
+ ARROW_EXPECT_OK(ExportType(*decimal64(5, 6), &schema));
+ EXPECT_EQ(ArrowSchemaViewInit(&schema_view, &schema, &error), NANOARROW_OK);
+ EXPECT_EQ(schema_view.type, NANOARROW_TYPE_DECIMAL64);
+ EXPECT_EQ(schema_view.storage_type, NANOARROW_TYPE_DECIMAL64);
+ EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA);
+ EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+ EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
+ EXPECT_EQ(schema_view.layout.buffer_data_type[1], NANOARROW_TYPE_DECIMAL64);
+ EXPECT_EQ(schema_view.layout.buffer_data_type[2], NANOARROW_TYPE_UNINITIALIZED);
+ EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(schema_view.layout.element_size_bits[1], 64);
+ EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
+ EXPECT_EQ(schema_view.decimal_bitwidth, 64);
+ EXPECT_EQ(schema_view.decimal_precision, 5);
+ EXPECT_EQ(schema_view.decimal_scale, 6);
+ EXPECT_EQ(ArrowSchemaToStdString(&schema), "decimal64(5, 6)");
+ ArrowSchemaRelease(&schema);
+#endif
+
ARROW_EXPECT_OK(ExportType(*decimal128(5, 6), &schema));
EXPECT_EQ(ArrowSchemaViewInit(&schema_view, &schema, &error), NANOARROW_OK);
EXPECT_EQ(schema_view.type, NANOARROW_TYPE_DECIMAL128);
diff --git a/src/nanoarrow/common/utils.c b/src/nanoarrow/common/utils.c
index 7be65ea..400625f 100644
--- a/src/nanoarrow/common/utils.c
+++ b/src/nanoarrow/common/utils.c
@@ -111,6 +111,7 @@
case NANOARROW_TYPE_UINT32:
case NANOARROW_TYPE_INT32:
case NANOARROW_TYPE_FLOAT:
+ case NANOARROW_TYPE_DECIMAL32:
layout->element_size_bits[1] = 32;
break;
case NANOARROW_TYPE_INTERVAL_MONTHS:
@@ -122,6 +123,7 @@
case NANOARROW_TYPE_INT64:
case NANOARROW_TYPE_DOUBLE:
case NANOARROW_TYPE_INTERVAL_DAY_TIME:
+ case NANOARROW_TYPE_DECIMAL64:
layout->element_size_bits[1] = 64;
break;
@@ -326,7 +328,7 @@
// Use 32-bit words for portability
uint32_t words32[8];
- int n_words32 = decimal->n_words * 2;
+ int n_words32 = decimal->n_words > 0 ? decimal->n_words * 2 : 1;
NANOARROW_DCHECK(n_words32 <= 8);
memset(words32, 0, sizeof(words32));
@@ -356,11 +358,14 @@
// https://github.com/apache/arrow/blob/cd3321b28b0c9703e5d7105d6146c1270bbadd7f/cpp/src/arrow/util/decimal.cc#L365
ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal* decimal,
struct ArrowBuffer* buffer) {
- NANOARROW_DCHECK(decimal->n_words == 2 || decimal->n_words == 4);
+ NANOARROW_DCHECK(decimal->n_words == 0 || decimal->n_words == 1 ||
+ decimal->n_words == 2 || decimal->n_words == 4);
int is_negative = ArrowDecimalSign(decimal) < 0;
uint64_t words_little_endian[4];
- if (decimal->low_word_index == 0) {
+ if (decimal->n_words == 0) {
+ memcpy(words_little_endian, decimal->words, sizeof(uint32_t));
+ } else if (decimal->low_word_index == 0) {
memcpy(words_little_endian, decimal->words, decimal->n_words * sizeof(uint64_t));
} else {
for (int i = 0; i < decimal->n_words; i++) {
@@ -370,21 +375,33 @@
// We've already made a copy, so negate that if needed
if (is_negative) {
- uint64_t carry = 1;
- for (int i = 0; i < decimal->n_words; i++) {
- uint64_t elem = words_little_endian[i];
- elem = ~elem + carry;
- carry &= (elem == 0);
- words_little_endian[i] = elem;
+ if (decimal->n_words == 0) {
+ uint32_t elem = (uint32_t)words_little_endian[0];
+ elem = ~elem + 1;
+ words_little_endian[0] = (int32_t)elem;
+ } else {
+ uint64_t carry = 1;
+ for (int i = 0; i < decimal->n_words; i++) {
+ uint64_t elem = words_little_endian[i];
+ elem = ~elem + carry;
+ carry &= (elem == 0);
+ words_little_endian[i] = elem;
+ }
}
}
// Find the most significant word that is non-zero
int most_significant_elem_idx = -1;
- for (int i = decimal->n_words - 1; i >= 0; i--) {
- if (words_little_endian[i] != 0) {
- most_significant_elem_idx = i;
- break;
+ if (decimal->n_words == 0) {
+ if (words_little_endian[0] != 0) {
+ most_significant_elem_idx = 0;
+ }
+ } else {
+ for (int i = decimal->n_words - 1; i >= 0; i--) {
+ if (words_little_endian[i] != 0) {
+ most_significant_elem_idx = i;
+ break;
+ }
}
}
diff --git a/src/nanoarrow/common/utils_test.cc b/src/nanoarrow/common/utils_test.cc
index be7ff75..7c86ca5 100644
--- a/src/nanoarrow/common/utils_test.cc
+++ b/src/nanoarrow/common/utils_test.cc
@@ -256,6 +256,86 @@
#endif
}
+TEST(DecimalTest, Decimal32Test) {
+ struct ArrowDecimal decimal;
+ ArrowDecimalInit(&decimal, 32, 8, 3);
+
+ EXPECT_EQ(decimal.n_words, 0);
+ EXPECT_EQ(decimal.precision, 8);
+ EXPECT_EQ(decimal.scale, 3);
+
+#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW) && ARROW_VERSION_MAJOR >= 18
+ auto dec_pos = *Decimal32::FromString("12.345");
+ uint8_t bytes_pos[4];
+ dec_pos.ToBytes(bytes_pos);
+
+ auto dec_neg = *Decimal32::FromString("-34.567");
+ uint8_t bytes_neg[4];
+ dec_neg.ToBytes(bytes_neg);
+#endif
+
+ ArrowDecimalSetInt(&decimal, 12345);
+ EXPECT_EQ(ArrowDecimalGetIntUnsafe(&decimal), 12345);
+ EXPECT_EQ(ArrowDecimalSign(&decimal), 1);
+#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW) && ARROW_VERSION_MAJOR >= 18
+ EXPECT_EQ(memcmp(decimal.words, bytes_pos, sizeof(bytes_pos)), 0);
+ ArrowDecimalSetBytes(&decimal, bytes_pos);
+ EXPECT_EQ(memcmp(decimal.words, bytes_pos, sizeof(bytes_pos)), 0);
+#endif
+
+ ArrowDecimalSetInt(&decimal, -34567);
+ EXPECT_EQ(ArrowDecimalGetIntUnsafe(&decimal), -34567);
+ EXPECT_EQ(ArrowDecimalSign(&decimal), -1);
+#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW) && ARROW_VERSION_MAJOR >= 18
+ EXPECT_EQ(memcmp(decimal.words, bytes_neg, sizeof(bytes_neg)), 0);
+ ArrowDecimalSetBytes(&decimal, bytes_neg);
+ EXPECT_EQ(memcmp(decimal.words, bytes_neg, sizeof(bytes_neg)), 0);
+#endif
+}
+
+TEST(DecimalTest, Decimal64Test) {
+ struct ArrowDecimal decimal;
+ ArrowDecimalInit(&decimal, 64, 10, 3);
+
+ EXPECT_EQ(decimal.n_words, 1);
+ EXPECT_EQ(decimal.precision, 10);
+ EXPECT_EQ(decimal.scale, 3);
+
+ if (_ArrowIsLittleEndian()) {
+ EXPECT_EQ(decimal.high_word_index - decimal.low_word_index + 1, decimal.n_words);
+ } else {
+ EXPECT_EQ(decimal.low_word_index - decimal.high_word_index + 1, decimal.n_words);
+ }
+
+#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW) && ARROW_VERSION_MAJOR >= 18
+ auto dec_pos = *Decimal64::FromString("12.345");
+ uint8_t bytes_pos[8];
+ dec_pos.ToBytes(bytes_pos);
+
+ auto dec_neg = *Decimal64::FromString("-34.567");
+ uint8_t bytes_neg[8];
+ dec_neg.ToBytes(bytes_neg);
+#endif
+
+ ArrowDecimalSetInt(&decimal, 12345);
+ EXPECT_EQ(ArrowDecimalGetIntUnsafe(&decimal), 12345);
+ EXPECT_EQ(ArrowDecimalSign(&decimal), 1);
+#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW) && ARROW_VERSION_MAJOR >= 18
+ EXPECT_EQ(memcmp(decimal.words, bytes_pos, sizeof(bytes_pos)), 0);
+ ArrowDecimalSetBytes(&decimal, bytes_pos);
+ EXPECT_EQ(memcmp(decimal.words, bytes_pos, sizeof(bytes_pos)), 0);
+#endif
+
+ ArrowDecimalSetInt(&decimal, -34567);
+ EXPECT_EQ(ArrowDecimalGetIntUnsafe(&decimal), -34567);
+ EXPECT_EQ(ArrowDecimalSign(&decimal), -1);
+#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW) && ARROW_VERSION_MAJOR >= 18
+ EXPECT_EQ(memcmp(decimal.words, bytes_neg, sizeof(bytes_neg)), 0);
+ ArrowDecimalSetBytes(&decimal, bytes_neg);
+ EXPECT_EQ(memcmp(decimal.words, bytes_neg, sizeof(bytes_neg)), 0);
+#endif
+}
+
TEST(DecimalTest, Decimal128Test) {
struct ArrowDecimal decimal;
ArrowDecimalInit(&decimal, 128, 10, 3);
@@ -302,8 +382,12 @@
struct ArrowBuffer buffer;
ArrowBufferInit(&buffer);
- for (auto bitwidth : {128, 256}) {
- ArrowDecimalInit(&decimal, bitwidth, 39, 0);
+ for (auto bitwidth : {32, 64, 128, 256}) {
+ if (bitwidth > 64) {
+ ArrowDecimalInit(&decimal, bitwidth, 39, 0);
+ } else {
+ ArrowDecimalInit(&decimal, bitwidth, 8, 3);
+ }
// Check with a value whose value is contained entirely in the least significant digit
ArrowDecimalSetInt(&decimal, 12345);
@@ -314,25 +398,41 @@
// Check with a value whose negative value will carry into a more significant digit
memset(decimal.words, 0, sizeof(decimal.words));
- decimal.words[decimal.low_word_index] = std::numeric_limits<uint64_t>::max();
+ if (bitwidth > 64) {
+ decimal.words[decimal.low_word_index] = std::numeric_limits<uint64_t>::max();
+ } else if (bitwidth == 64) {
+ decimal.words[decimal.low_word_index] = std::numeric_limits<int64_t>::max();
+ } else {
+ decimal.words[decimal.low_word_index] = std::numeric_limits<int32_t>::max();
+ }
ASSERT_EQ(ArrowDecimalSign(&decimal), 1);
ArrowDecimalNegate(&decimal);
ASSERT_EQ(ArrowDecimalSign(&decimal), -1);
ArrowDecimalNegate(&decimal);
ASSERT_EQ(ArrowDecimalSign(&decimal), 1);
- EXPECT_EQ(decimal.words[decimal.low_word_index],
- std::numeric_limits<uint64_t>::max());
+ if (bitwidth > 64) {
+ EXPECT_EQ(decimal.words[decimal.low_word_index],
+ std::numeric_limits<uint64_t>::max());
+ } else if (bitwidth == 64) {
+ EXPECT_EQ(decimal.words[decimal.low_word_index],
+ std::numeric_limits<int64_t>::max());
+ } else {
+ EXPECT_EQ(decimal.words[decimal.low_word_index],
+ std::numeric_limits<int32_t>::max());
+ }
- // Check with a large value that fits in the 128 bit size
- ASSERT_EQ(
- ArrowDecimalSetDigits(&decimal, "123456789012345678901234567890123456789"_asv),
- NANOARROW_OK);
- ArrowDecimalNegate(&decimal);
+ if (bitwidth > 64) {
+ // Check with a large value that fits in the 128 bit size
+ ASSERT_EQ(
+ ArrowDecimalSetDigits(&decimal, "123456789012345678901234567890123456789"_asv),
+ NANOARROW_OK);
+ ArrowDecimalNegate(&decimal);
- buffer.size_bytes = 0;
- ASSERT_EQ(ArrowDecimalAppendDigitsToBuffer(&decimal, &buffer), NANOARROW_OK);
- EXPECT_EQ(std::string(reinterpret_cast<char*>(buffer.data), buffer.size_bytes),
- "-123456789012345678901234567890123456789");
+ buffer.size_bytes = 0;
+ ASSERT_EQ(ArrowDecimalAppendDigitsToBuffer(&decimal, &buffer), NANOARROW_OK);
+ EXPECT_EQ(std::string(reinterpret_cast<char*>(buffer.data), buffer.size_bytes),
+ "-123456789012345678901234567890123456789");
+ }
}
// Check with a large value that only fits in the 256 bit range
diff --git a/src/nanoarrow/ipc/decoder.c b/src/nanoarrow/ipc/decoder.c
index d2bad28..b74b4e0 100644
--- a/src/nanoarrow/ipc/decoder.c
+++ b/src/nanoarrow/ipc/decoder.c
@@ -429,6 +429,14 @@
int result;
switch (bitwidth) {
+ case 32:
+ result =
+ ArrowSchemaSetTypeDecimal(schema, NANOARROW_TYPE_DECIMAL32, precision, scale);
+ break;
+ case 64:
+ result =
+ ArrowSchemaSetTypeDecimal(schema, NANOARROW_TYPE_DECIMAL64, precision, scale);
+ break;
case 128:
result =
ArrowSchemaSetTypeDecimal(schema, NANOARROW_TYPE_DECIMAL128, precision, scale);
@@ -1538,13 +1546,21 @@
}
switch (src->data_type) {
+ case NANOARROW_TYPE_DECIMAL32: {
+ uint32_t* ptr = (uint32_t*)dst->data;
+ for (int64_t i = 0; i < (dst->size_bytes / 4); i++) {
+ ptr[i] = bswap32(out_view->data.as_uint32[i]);
+ }
+ break;
+ }
+ case NANOARROW_TYPE_DECIMAL64:
case NANOARROW_TYPE_DECIMAL128:
case NANOARROW_TYPE_DECIMAL256: {
const uint64_t* ptr_src = out_view->data.as_uint64;
uint64_t* ptr_dst = (uint64_t*)dst->data;
uint64_t words[4];
int n_words = (int)(src->element_size_bits / 64);
- NANOARROW_DCHECK(n_words == 2 || n_words == 4);
+ NANOARROW_DCHECK(n_words == 1 || n_words == 2 || n_words == 4);
for (int64_t i = 0; i < (dst->size_bytes / n_words / 8); i++) {
for (int j = 0; j < n_words; j++) {
diff --git a/src/nanoarrow/ipc/encoder.c b/src/nanoarrow/ipc/encoder.c
index 6e920cd..d587614 100644
--- a/src/nanoarrow/ipc/encoder.c
+++ b/src/nanoarrow/ipc/encoder.c
@@ -184,6 +184,8 @@
Field_type_FloatingPoint_create(builder, ns(Precision_DOUBLE)), error);
return NANOARROW_OK;
+ case NANOARROW_TYPE_DECIMAL32:
+ case NANOARROW_TYPE_DECIMAL64:
case NANOARROW_TYPE_DECIMAL128:
case NANOARROW_TYPE_DECIMAL256:
FLATCC_RETURN_UNLESS_0(
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index 312ecbe..3ca294f 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -369,8 +369,9 @@
/// \brief Set the format field of a decimal schema
///
/// Returns EINVAL for scale <= 0 or for type that is not
-/// NANOARROW_TYPE_DECIMAL128 or NANOARROW_TYPE_DECIMAL256. Schema must have been
-/// initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy().
+/// NANOARROW_TYPE_DECIMAL32, NANOARROW_TYPE_DECIMAL64, NANOARROW_TYPE_DECIMAL128 or
+/// NANOARROW_TYPE_DECIMAL256. Schema must have been initialized using
+/// ArrowSchemaInit() or ArrowSchemaDeepCopy().
ArrowErrorCode ArrowSchemaSetTypeDecimal(struct ArrowSchema* schema, enum ArrowType type,
int32_t decimal_precision,
int32_t decimal_scale);