cpp/src/arrow/util/decimal.cc - arrow - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include <algorithm>
 #include <array>
 #include <climits>
 #include <cmath>
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
 #include <iomanip>
 #include <limits>
 #include <ostream>
 #include <sstream>
 #include <string>

 #include "arrow/status.h"
 #include "arrow/util/decimal.h"
 #include "arrow/util/endian.h"
 #include "arrow/util/formatting.h"
 #include "arrow/util/int128_internal.h"
 #include "arrow/util/int_util_internal.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/value_parsing.h"

 namespace arrow {

 using internal::SafeLeftShift;
 using internal::SafeSignedAdd;
 using internal::uint128_t;

 Decimal128::Decimal128(const std::string& str) : Decimal128() {
   *this = Decimal128::FromString(str).ValueOrDie();
 }

 static constexpr auto kInt64DecimalDigits =
     static_cast<size_t>(std::numeric_limits<int64_t>::digits10);

 static constexpr uint64_t kUInt64PowersOfTen[kInt64DecimalDigits + 1] = {
     // clang-format off
     1ULL,
     10ULL,
     100ULL,
     1000ULL,
     10000ULL,
     100000ULL,
     1000000ULL,
     10000000ULL,
     100000000ULL,
     1000000000ULL,
     10000000000ULL,
     100000000000ULL,
     1000000000000ULL,
     10000000000000ULL,
     100000000000000ULL,
     1000000000000000ULL,
     10000000000000000ULL,
     100000000000000000ULL,
     1000000000000000000ULL
     // clang-format on
 };

 static constexpr float kFloatPowersOfTen[2 * 38 + 1] = {
     1e-38f, 1e-37f, 1e-36f, 1e-35f, 1e-34f, 1e-33f, 1e-32f, 1e-31f, 1e-30f, 1e-29f,
     1e-28f, 1e-27f, 1e-26f, 1e-25f, 1e-24f, 1e-23f, 1e-22f, 1e-21f, 1e-20f, 1e-19f,
     1e-18f, 1e-17f, 1e-16f, 1e-15f, 1e-14f, 1e-13f, 1e-12f, 1e-11f, 1e-10f, 1e-9f,
     1e-8f,  1e-7f,  1e-6f,  1e-5f,  1e-4f,  1e-3f,  1e-2f,  1e-1f,  1e0f,   1e1f,
     1e2f,   1e3f,   1e4f,   1e5f,   1e6f,   1e7f,   1e8f,   1e9f,   1e10f,  1e11f,
     1e12f,  1e13f,  1e14f,  1e15f,  1e16f,  1e17f,  1e18f,  1e19f,  1e20f,  1e21f,
     1e22f,  1e23f,  1e24f,  1e25f,  1e26f,  1e27f,  1e28f,  1e29f,  1e30f,  1e31f,
     1e32f,  1e33f,  1e34f,  1e35f,  1e36f,  1e37f,  1e38f};

 static constexpr double kDoublePowersOfTen[2 * 38 + 1] = {
     1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28,
     1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17,
     1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9,  1e-8,  1e-7,  1e-6,
     1e-5,  1e-4,  1e-3,  1e-2,  1e-1,  1e0,   1e1,   1e2,   1e3,   1e4,   1e5,
     1e6,   1e7,   1e8,   1e9,   1e10,  1e11,  1e12,  1e13,  1e14,  1e15,  1e16,
     1e17,  1e18,  1e19,  1e20,  1e21,  1e22,  1e23,  1e24,  1e25,  1e26,  1e27,
     1e28,  1e29,  1e30,  1e31,  1e32,  1e33,  1e34,  1e35,  1e36,  1e37,  1e38};

 // On the Windows R toolchain, INFINITY is double type instead of float
 static constexpr float kFloatInf = std::numeric_limits<float>::infinity();
 static constexpr float kFloatPowersOfTen76[2 * 76 + 1] = {
     0,         0,         0,         0,         0,         0,         0,
     0,         0,         0,         0,         0,         0,         0,
     0,         0,         0,         0,         0,         0,         0,
     0,         0,         0,         0,         0,         0,         0,
     0,         0,         0,         1e-45f,    1e-44f,    1e-43f,    1e-42f,
     1e-41f,    1e-40f,    1e-39f,    1e-38f,    1e-37f,    1e-36f,    1e-35f,
     1e-34f,    1e-33f,    1e-32f,    1e-31f,    1e-30f,    1e-29f,    1e-28f,
     1e-27f,    1e-26f,    1e-25f,    1e-24f,    1e-23f,    1e-22f,    1e-21f,
     1e-20f,    1e-19f,    1e-18f,    1e-17f,    1e-16f,    1e-15f,    1e-14f,
     1e-13f,    1e-12f,    1e-11f,    1e-10f,    1e-9f,     1e-8f,     1e-7f,
     1e-6f,     1e-5f,     1e-4f,     1e-3f,     1e-2f,     1e-1f,     1e0f,
     1e1f,      1e2f,      1e3f,      1e4f,      1e5f,      1e6f,      1e7f,
     1e8f,      1e9f,      1e10f,     1e11f,     1e12f,     1e13f,     1e14f,
     1e15f,     1e16f,     1e17f,     1e18f,     1e19f,     1e20f,     1e21f,
     1e22f,     1e23f,     1e24f,     1e25f,     1e26f,     1e27f,     1e28f,
     1e29f,     1e30f,     1e31f,     1e32f,     1e33f,     1e34f,     1e35f,
     1e36f,     1e37f,     1e38f,     kFloatInf, kFloatInf, kFloatInf, kFloatInf,
     kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
     kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
     kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
     kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
     kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf};

 static constexpr double kDoublePowersOfTen76[2 * 76 + 1] = {
     1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65,
     1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53,
     1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41,
     1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29,
     1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17,
     1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9,  1e-8,  1e-7,  1e-6,  1e-5,
     1e-4,  1e-3,  1e-2,  1e-1,  1e0,   1e1,   1e2,   1e3,   1e4,   1e5,   1e6,   1e7,
     1e8,   1e9,   1e10,  1e11,  1e12,  1e13,  1e14,  1e15,  1e16,  1e17,  1e18,  1e19,
     1e20,  1e21,  1e22,  1e23,  1e24,  1e25,  1e26,  1e27,  1e28,  1e29,  1e30,  1e31,
     1e32,  1e33,  1e34,  1e35,  1e36,  1e37,  1e38,  1e39,  1e40,  1e41,  1e42,  1e43,
     1e44,  1e45,  1e46,  1e47,  1e48,  1e49,  1e50,  1e51,  1e52,  1e53,  1e54,  1e55,
     1e56,  1e57,  1e58,  1e59,  1e60,  1e61,  1e62,  1e63,  1e64,  1e65,  1e66,  1e67,
     1e68,  1e69,  1e70,  1e71,  1e72,  1e73,  1e74,  1e75,  1e76};

 namespace {

 template <typename Real, typename Derived>
 struct DecimalRealConversion {
   static Result<Decimal128> FromPositiveReal(Real real, int32_t precision,
                                              int32_t scale) {
     auto x = real;
     if (scale >= -38 && scale <= 38) {
       x *= Derived::powers_of_ten()[scale + 38];
     } else {
       x *= std::pow(static_cast<Real>(10), static_cast<Real>(scale));
     }
     x = std::nearbyint(x);
     const auto max_abs = Derived::powers_of_ten()[precision + 38];
     if (x <= -max_abs || x >= max_abs) {
       return Status::Invalid("Cannot convert ", real,
                              " to Decimal128(precision = ", precision,
                              ", scale = ", scale, "): overflow");
     }
     // Extract high and low bits
     const auto high = std::floor(std::ldexp(x, -64));
     const auto low = x - std::ldexp(high, 64);

     DCHECK_GE(high, -9.223372036854776e+18);  // -2**63
     DCHECK_LT(high, 9.223372036854776e+18);   // 2**63
     DCHECK_GE(low, 0);
     DCHECK_LT(low, 1.8446744073709552e+19);  // 2**64
     return Decimal128(static_cast<int64_t>(high), static_cast<uint64_t>(low));
   }

   static Result<Decimal128> FromReal(Real x, int32_t precision, int32_t scale) {
     DCHECK_GT(precision, 0);
     DCHECK_LE(precision, 38);

     if (!std::isfinite(x)) {
       return Status::Invalid("Cannot convert ", x, " to Decimal128");
     }
     if (x < 0) {
       ARROW_ASSIGN_OR_RAISE(auto dec, FromPositiveReal(-x, precision, scale));
       return dec.Negate();
     } else {
       // Includes negative zero
       return FromPositiveReal(x, precision, scale);
     }
   }

   static Real ToRealPositive(const Decimal128& decimal, int32_t scale) {
     Real x = static_cast<Real>(decimal.high_bits()) * Derived::two_to_64();
     x += static_cast<Real>(decimal.low_bits());
     if (scale >= -38 && scale <= 38) {
       x *= Derived::powers_of_ten()[-scale + 38];
     } else {
       x *= std::pow(static_cast<Real>(10), static_cast<Real>(-scale));
     }
     return x;
   }

   static Real ToReal(Decimal128 decimal, int32_t scale) {
     if (decimal.high_bits() < 0) {
       // Convert the absolute value to avoid precision loss
       decimal.Negate();
       return -ToRealPositive(decimal, scale);
     } else {
       return ToRealPositive(decimal, scale);
     }
   }
 };

 struct DecimalFloatConversion
     : public DecimalRealConversion<float, DecimalFloatConversion> {
   static constexpr const float* powers_of_ten() { return kFloatPowersOfTen; }

   static constexpr float two_to_64() { return 1.8446744e+19f; }
 };

 struct DecimalDoubleConversion
     : public DecimalRealConversion<double, DecimalDoubleConversion> {
   static constexpr const double* powers_of_ten() { return kDoublePowersOfTen; }

   static constexpr double two_to_64() { return 1.8446744073709552e+19; }
 };

 }  // namespace

 Result<Decimal128> Decimal128::FromReal(float x, int32_t precision, int32_t scale) {
   return DecimalFloatConversion::FromReal(x, precision, scale);
 }

 Result<Decimal128> Decimal128::FromReal(double x, int32_t precision, int32_t scale) {
   return DecimalDoubleConversion::FromReal(x, precision, scale);
 }

 float Decimal128::ToFloat(int32_t scale) const {
   return DecimalFloatConversion::ToReal(*this, scale);
 }

 double Decimal128::ToDouble(int32_t scale) const {
   return DecimalDoubleConversion::ToReal(*this, scale);
 }

 template <size_t n>
 static void AppendLittleEndianArrayToString(const std::array<uint64_t, n>& array,
                                             std::string* result) {
   const auto most_significant_non_zero =
       find_if(array.rbegin(), array.rend(), [](uint64_t v) { return v != 0; });
   if (most_significant_non_zero == array.rend()) {
     result->push_back('0');
     return;
   }

   size_t most_significant_elem_idx = &*most_significant_non_zero - array.data();
   std::array<uint64_t, n> copy = array;
   constexpr uint32_t k1e9 = 1000000000U;
   constexpr size_t kNumBits = n * 64;
   // Segments will contain the array split into groups that map to decimal digits,
   // in little endian order. Each segment will hold at most 9 decimal digits.
   // For example, if the input represents 9876543210123456789, then segments will be
   // [123456789, 876543210, 9].
   // The max number of segments needed = ceil(kNumBits * log(2) / log(1e9))
   // = ceil(kNumBits / 29.897352854) <= ceil(kNumBits / 29).
   std::array<uint32_t, (kNumBits + 28) / 29> segments;
   size_t num_segments = 0;
   uint64_t* most_significant_elem = &copy[most_significant_elem_idx];
   do {
     // Compute remainder = copy % 1e9 and copy = copy / 1e9.
     uint32_t remainder = 0;
     uint64_t* elem = most_significant_elem;
     do {
       // Compute dividend = (remainder << 32) | *elem  (a virtual 96-bit integer);
       // *elem = dividend / 1e9;
       // remainder = dividend % 1e9.
       uint32_t hi = static_cast<uint32_t>(*elem >> 32);
       uint32_t lo = static_cast<uint32_t>(*elem & BitUtil::LeastSignificantBitMask(32));
       uint64_t dividend_hi = (static_cast<uint64_t>(remainder) << 32) | hi;
       uint64_t quotient_hi = dividend_hi / k1e9;
       remainder = static_cast<uint32_t>(dividend_hi % k1e9);
       uint64_t dividend_lo = (static_cast<uint64_t>(remainder) << 32) | lo;
       uint64_t quotient_lo = dividend_lo / k1e9;
       remainder = static_cast<uint32_t>(dividend_lo % k1e9);
       *elem = (quotient_hi << 32) | quotient_lo;
     } while (elem-- != copy.data());

     segments[num_segments++] = remainder;
   } while (*most_significant_elem != 0 || most_significant_elem-- != copy.data());

   size_t old_size = result->size();
   size_t new_size = old_size + num_segments * 9;
   result->resize(new_size, '0');
   char* output = &result->at(old_size);
   const uint32_t* segment = &segments[num_segments - 1];
   internal::StringFormatter<UInt32Type> format;
   // First segment is formatted as-is.
   format(*segment, [&output](util::string_view formatted) {
     memcpy(output, formatted.data(), formatted.size());
     output += formatted.size();
   });
   while (segment != segments.data()) {
     --segment;
     // Right-pad formatted segment such that e.g. 123 is formatted as "000000123".
     output += 9;
     format(*segment, [output](util::string_view formatted) {
       memcpy(output - formatted.size(), formatted.data(), formatted.size());
     });
   }
   result->resize(output - result->data());
 }

 std::string Decimal128::ToIntegerString() const {
   std::string result;
   if (high_bits() < 0) {
     result.push_back('-');
     Decimal128 abs = *this;
     abs.Negate();
     AppendLittleEndianArrayToString<2>(
         {abs.low_bits(), static_cast<uint64_t>(abs.high_bits())}, &result);
   } else {
     AppendLittleEndianArrayToString<2>({low_bits(), static_cast<uint64_t>(high_bits())},
                                        &result);
   }
   return result;
 }

 Decimal128::operator int64_t() const {
   DCHECK(high_bits() == 0 || high_bits() == -1)
       << "Trying to cast a Decimal128 greater than the value range of a "
          "int64_t. high_bits_ must be equal to 0 or -1, got: "
       << high_bits();
   return static_cast<int64_t>(low_bits());
 }

 static void AdjustIntegerStringWithScale(int32_t scale, std::string* str) {
   if (scale == 0) {
     return;
   }
   DCHECK(str != nullptr);
   DCHECK(!str->empty());
   const bool is_negative = str->front() == '-';
   const auto is_negative_offset = static_cast<int32_t>(is_negative);
   const auto len = static_cast<int32_t>(str->size());
   const int32_t num_digits = len - is_negative_offset;
   const int32_t adjusted_exponent = num_digits - 1 - scale;

   /// Note that the -6 is taken from the Java BigDecimal documentation.
   if (scale < 0 || adjusted_exponent < -6) {
     // Example 1:
     // Precondition: *str = "123", is_negative_offset = 0, num_digits = 3, scale = -2,
     //               adjusted_exponent = 4
     // After inserting decimal point: *str = "1.23"
     // After appending exponent: *str = "1.23E+4"
     // Example 2:
     // Precondition: *str = "-123", is_negative_offset = 1, num_digits = 3, scale = 9,
     //               adjusted_exponent = -7
     // After inserting decimal point: *str = "-1.23"
     // After appending exponent: *str = "-1.23E-7"
     str->insert(str->begin() + 1 + is_negative_offset, '.');
     str->push_back('E');
     if (adjusted_exponent >= 0) {
       str->push_back('+');
     }
     internal::StringFormatter<Int32Type> format;
     format(adjusted_exponent, [str](util::string_view formatted) {
       str->append(formatted.data(), formatted.size());
     });
     return;
   }

   if (num_digits > scale) {
     const auto n = static_cast<size_t>(len - scale);
     // Example 1:
     // Precondition: *str = "123", len = num_digits = 3, scale = 1, n = 2
     // After inserting decimal point: *str = "12.3"
     // Example 2:
     // Precondition: *str = "-123", len = 4, num_digits = 3, scale = 1, n = 3
     // After inserting decimal point: *str = "-12.3"
     str->insert(str->begin() + n, '.');
     return;
   }

   // Example 1:
   // Precondition: *str = "123", is_negative_offset = 0, num_digits = 3, scale = 4
   // After insert: *str = "000123"
   // After setting decimal point: *str = "0.0123"
   // Example 2:
   // Precondition: *str = "-123", is_negative_offset = 1, num_digits = 3, scale = 4
   // After insert: *str = "-000123"
   // After setting decimal point: *str = "-0.0123"
   str->insert(is_negative_offset, scale - num_digits + 2, '0');
   str->at(is_negative_offset + 1) = '.';
 }

 std::string Decimal128::ToString(int32_t scale) const {
   std::string str(ToIntegerString());
   AdjustIntegerStringWithScale(scale, &str);
   return str;
 }

 // Iterates over input and for each group of kInt64DecimalDigits multiple out by
 // the appropriate power of 10 necessary to add source parsed as uint64 and
 // then adds the parsed value of source.
 static inline void ShiftAndAdd(const util::string_view& input, uint64_t out[],
                                size_t out_size) {
   for (size_t posn = 0; posn < input.size();) {
     const size_t group_size = std::min(kInt64DecimalDigits, input.size() - posn);
     const uint64_t multiple = kUInt64PowersOfTen[group_size];
     uint64_t chunk = 0;
     ARROW_CHECK(
         internal::ParseValue<UInt64Type>(input.data() + posn, group_size, &chunk));

     for (size_t i = 0; i < out_size; ++i) {
       uint128_t tmp = out[i];
       tmp *= multiple;
       tmp += chunk;
       out[i] = static_cast<uint64_t>(tmp & 0xFFFFFFFFFFFFFFFFULL);
       chunk = static_cast<uint64_t>(tmp >> 64);
     }
     posn += group_size;
   }
 }

 namespace {

 struct DecimalComponents {
   util::string_view whole_digits;
   util::string_view fractional_digits;
   int32_t exponent = 0;
   char sign = 0;
   bool has_exponent = false;
 };

 inline bool IsSign(char c) { return c == '-' || c == '+'; }

 inline bool IsDot(char c) { return c == '.'; }

 inline bool IsDigit(char c) { return c >= '0' && c <= '9'; }

 inline bool StartsExponent(char c) { return c == 'e' || c == 'E'; }

 inline size_t ParseDigitsRun(const char* s, size_t start, size_t size,
                              util::string_view* out) {
   size_t pos;
   for (pos = start; pos < size; ++pos) {
     if (!IsDigit(s[pos])) {
       break;
     }
   }
   *out = util::string_view(s + start, pos - start);
   return pos;
 }

 bool ParseDecimalComponents(const char* s, size_t size, DecimalComponents* out) {
   size_t pos = 0;

   if (size == 0) {
     return false;
   }
   // Sign of the number
   if (IsSign(s[pos])) {
     out->sign = *(s + pos);
     ++pos;
   }
   // First run of digits
   pos = ParseDigitsRun(s, pos, size, &out->whole_digits);
   if (pos == size) {
     return !out->whole_digits.empty();
   }
   // Optional dot (if given in fractional form)
   bool has_dot = IsDot(s[pos]);
   if (has_dot) {
     // Second run of digits
     ++pos;
     pos = ParseDigitsRun(s, pos, size, &out->fractional_digits);
   }
   if (out->whole_digits.empty() && out->fractional_digits.empty()) {
     // Need at least some digits (whole or fractional)
     return false;
   }
   if (pos == size) {
     return true;
   }
   // Optional exponent
   if (StartsExponent(s[pos])) {
     ++pos;
     if (pos != size && s[pos] == '+') {
       ++pos;
     }
     out->has_exponent = true;
     return internal::ParseValue<Int32Type>(s + pos, size - pos, &(out->exponent));
   }
   return pos == size;
 }

 inline Status ToArrowStatus(DecimalStatus dstatus, int num_bits) {
   switch (dstatus) {
     case DecimalStatus::kSuccess:
       return Status::OK();

     case DecimalStatus::kDivideByZero:
       return Status::Invalid("Division by 0 in Decimal", num_bits);

     case DecimalStatus::kOverflow:
       return Status::Invalid("Overflow occurred during Decimal", num_bits, " operation.");

     case DecimalStatus::kRescaleDataLoss:
       return Status::Invalid("Rescaling Decimal", num_bits,
                              " value would cause data loss");
   }
   return Status::OK();
 }

 }  // namespace

 Status Decimal128::FromString(const util::string_view& s, Decimal128* out,
                               int32_t* precision, int32_t* scale) {
   if (s.empty()) {
     return Status::Invalid("Empty string cannot be converted to decimal");
   }

   DecimalComponents dec;
   if (!ParseDecimalComponents(s.data(), s.size(), &dec)) {
     return Status::Invalid("The string '", s, "' is not a valid decimal number");
   }

   // Count number of significant digits (without leading zeros)
   size_t first_non_zero = dec.whole_digits.find_first_not_of('0');
   size_t significant_digits = dec.fractional_digits.size();
   if (first_non_zero != std::string::npos) {
     significant_digits += dec.whole_digits.size() - first_non_zero;
   }
   int32_t parsed_precision = static_cast<int32_t>(significant_digits);

   int32_t parsed_scale = 0;
   if (dec.has_exponent) {
     auto adjusted_exponent = dec.exponent;
     auto len = static_cast<int32_t>(significant_digits);
     parsed_scale = -adjusted_exponent + len - 1;
   } else {
     parsed_scale = static_cast<int32_t>(dec.fractional_digits.size());
   }

   if (out != nullptr) {
     std::array<uint64_t, 2> little_endian_array = {0, 0};
     ShiftAndAdd(dec.whole_digits, little_endian_array.data(), little_endian_array.size());
     ShiftAndAdd(dec.fractional_digits, little_endian_array.data(),
                 little_endian_array.size());
     *out =
         Decimal128(static_cast<int64_t>(little_endian_array[1]), little_endian_array[0]);
     if (parsed_scale < 0) {
       *out *= GetScaleMultiplier(-parsed_scale);
     }

     if (dec.sign == '-') {
       out->Negate();
     }
   }

   if (parsed_scale < 0) {
     parsed_precision -= parsed_scale;
     parsed_scale = 0;
   }

   if (precision != nullptr) {
     *precision = parsed_precision;
   }
   if (scale != nullptr) {
     *scale = parsed_scale;
   }

   return Status::OK();
 }

 Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t* precision,
                               int32_t* scale) {
   return FromString(util::string_view(s), out, precision, scale);
 }

 Status Decimal128::FromString(const char* s, Decimal128* out, int32_t* precision,
                               int32_t* scale) {
   return FromString(util::string_view(s), out, precision, scale);
 }

 Result<Decimal128> Decimal128::FromString(const util::string_view& s) {
   Decimal128 out;
   RETURN_NOT_OK(FromString(s, &out, nullptr, nullptr));
   return std::move(out);
 }

 Result<Decimal128> Decimal128::FromString(const std::string& s) {
   return FromString(util::string_view(s));
 }

 Result<Decimal128> Decimal128::FromString(const char* s) {
   return FromString(util::string_view(s));
 }

 // Helper function used by Decimal128::FromBigEndian
 static inline uint64_t UInt64FromBigEndian(const uint8_t* bytes, int32_t length) {
   // We don't bounds check the length here because this is called by
   // FromBigEndian that has a Decimal128 as its out parameters and
   // that function is already checking the length of the bytes and only
   // passes lengths between zero and eight.
   uint64_t result = 0;
   // Using memcpy instead of special casing for length
   // and doing the conversion in 16, 32 parts, which could
   // possibly create unaligned memory access on certain platforms
   memcpy(reinterpret_cast<uint8_t*>(&result) + 8 - length, bytes, length);
   return ::arrow::BitUtil::FromBigEndian(result);
 }

 Result<Decimal128> Decimal128::FromBigEndian(const uint8_t* bytes, int32_t length) {
   static constexpr int32_t kMinDecimalBytes = 1;
   static constexpr int32_t kMaxDecimalBytes = 16;

   int64_t high, low;

   if (ARROW_PREDICT_FALSE(length < kMinDecimalBytes || length > kMaxDecimalBytes)) {
     return Status::Invalid("Length of byte array passed to Decimal128::FromBigEndian ",
                            "was ", length, ", but must be between ", kMinDecimalBytes,
                            " and ", kMaxDecimalBytes);
   }

   // Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the
   // sign bit.
   const bool is_negative = static_cast<int8_t>(bytes[0]) < 0;

   // 1. Extract the high bytes
   // Stop byte of the high bytes
   const int32_t high_bits_offset = std::max(0, length - 8);
   const auto high_bits = UInt64FromBigEndian(bytes, high_bits_offset);

   if (high_bits_offset == 8) {
     // Avoid undefined shift by 64 below
     high = high_bits;
   } else {
     high = -1 * (is_negative && length < kMaxDecimalBytes);
     // Shift left enough bits to make room for the incoming int64_t
     high = SafeLeftShift(high, high_bits_offset * CHAR_BIT);
     // Preserve the upper bits by inplace OR-ing the int64_t
     high |= high_bits;
   }

   // 2. Extract the low bytes
   // Stop byte of the low bytes
   const int32_t low_bits_offset = std::min(length, 8);
   const auto low_bits =
       UInt64FromBigEndian(bytes + high_bits_offset, length - high_bits_offset);

   if (low_bits_offset == 8) {
     // Avoid undefined shift by 64 below
     low = low_bits;
   } else {
     // Sign extend the low bits if necessary
     low = -1 * (is_negative && length < 8);
     // Shift left enough bits to make room for the incoming int64_t
     low = SafeLeftShift(low, low_bits_offset * CHAR_BIT);
     // Preserve the upper bits by inplace OR-ing the int64_t
     low |= low_bits;
   }

   return Decimal128(high, static_cast<uint64_t>(low));
 }

 Status Decimal128::ToArrowStatus(DecimalStatus dstatus) const {
   return arrow::ToArrowStatus(dstatus, 128);
 }

 std::ostream& operator<<(std::ostream& os, const Decimal128& decimal) {
   os << decimal.ToIntegerString();
   return os;
 }

 Decimal256::Decimal256(const std::string& str) : Decimal256() {
   *this = Decimal256::FromString(str).ValueOrDie();
 }

 std::string Decimal256::ToIntegerString() const {
   std::string result;
   if (static_cast<int64_t>(little_endian_array()[3]) < 0) {
     result.push_back('-');
     Decimal256 abs = *this;
     abs.Negate();
     AppendLittleEndianArrayToString(abs.little_endian_array(), &result);
   } else {
     AppendLittleEndianArrayToString(little_endian_array(), &result);
   }
   return result;
 }

 std::string Decimal256::ToString(int32_t scale) const {
   std::string str(ToIntegerString());
   AdjustIntegerStringWithScale(scale, &str);
   return str;
 }

 Status Decimal256::FromString(const util::string_view& s, Decimal256* out,
                               int32_t* precision, int32_t* scale) {
   if (s.empty()) {
     return Status::Invalid("Empty string cannot be converted to decimal");
   }

   DecimalComponents dec;
   if (!ParseDecimalComponents(s.data(), s.size(), &dec)) {
     return Status::Invalid("The string '", s, "' is not a valid decimal number");
   }

   // Count number of significant digits (without leading zeros)
   size_t first_non_zero = dec.whole_digits.find_first_not_of('0');
   size_t significant_digits = dec.fractional_digits.size();
   if (first_non_zero != std::string::npos) {
     significant_digits += dec.whole_digits.size() - first_non_zero;
   }

   if (precision != nullptr) {
     *precision = static_cast<int32_t>(significant_digits);
   }

   if (scale != nullptr) {
     if (dec.has_exponent) {
       auto adjusted_exponent = dec.exponent;
       auto len = static_cast<int32_t>(significant_digits);
       *scale = -adjusted_exponent + len - 1;
     } else {
       *scale = static_cast<int32_t>(dec.fractional_digits.size());
     }
   }

   if (out != nullptr) {
     std::array<uint64_t, 4> little_endian_array = {0, 0, 0, 0};
     ShiftAndAdd(dec.whole_digits, little_endian_array.data(), little_endian_array.size());
     ShiftAndAdd(dec.fractional_digits, little_endian_array.data(),
                 little_endian_array.size());
     *out = Decimal256(little_endian_array);

     if (dec.sign == '-') {
       out->Negate();
     }
   }

   return Status::OK();
 }

 Status Decimal256::FromString(const std::string& s, Decimal256* out, int32_t* precision,
                               int32_t* scale) {
   return FromString(util::string_view(s), out, precision, scale);
 }

 Status Decimal256::FromString(const char* s, Decimal256* out, int32_t* precision,
                               int32_t* scale) {
   return FromString(util::string_view(s), out, precision, scale);
 }

 Result<Decimal256> Decimal256::FromString(const util::string_view& s) {
   Decimal256 out;
   RETURN_NOT_OK(FromString(s, &out, nullptr, nullptr));
   return std::move(out);
 }

 Result<Decimal256> Decimal256::FromString(const std::string& s) {
   return FromString(util::string_view(s));
 }

 Result<Decimal256> Decimal256::FromString(const char* s) {
   return FromString(util::string_view(s));
 }

 Result<Decimal256> Decimal256::FromBigEndian(const uint8_t* bytes, int32_t length) {
   static constexpr int32_t kMinDecimalBytes = 1;
   static constexpr int32_t kMaxDecimalBytes = 32;

   std::array<uint64_t, 4> little_endian_array;

   if (ARROW_PREDICT_FALSE(length < kMinDecimalBytes || length > kMaxDecimalBytes)) {
     return Status::Invalid("Length of byte array passed to Decimal128::FromBigEndian ",
                            "was ", length, ", but must be between ", kMinDecimalBytes,
                            " and ", kMaxDecimalBytes);
   }

   // Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the
   // sign bit.
   const bool is_negative = static_cast<int8_t>(bytes[0]) < 0;

   for (int word_idx = 0; word_idx < 4; word_idx++) {
     const int32_t word_length = std::min(length, static_cast<int32_t>(sizeof(uint64_t)));

     if (word_length == 8) {
       // Full words can be assigned as is (and are UB with the shift below).
       little_endian_array[word_idx] =
           UInt64FromBigEndian(bytes + length - word_length, word_length);
     } else {
       // Sign extend the word its if necessary
       uint64_t word = -1 * is_negative;
       if (length > 0) {
         // Incorporate the actual values if present.
         // Shift left enough bits to make room for the incoming int64_t
         word = SafeLeftShift(word, word_length * CHAR_BIT);
         // Preserve the upper bits by inplace OR-ing the int64_t
         word |= UInt64FromBigEndian(bytes + length - word_length, word_length);
       }
       little_endian_array[word_idx] = word;
     }
     // Move on to the next word.
     length -= word_length;
   }

   return Decimal256(little_endian_array);
 }

 Status Decimal256::ToArrowStatus(DecimalStatus dstatus) const {
   return arrow::ToArrowStatus(dstatus, 256);
 }

 namespace {

 template <typename Real, typename Derived>
 struct Decimal256RealConversion {
   static Result<Decimal256> FromPositiveReal(Real real, int32_t precision,
                                              int32_t scale) {
     auto x = real;
     if (scale >= -76 && scale <= 76) {
       x *= Derived::powers_of_ten()[scale + 76];
     } else {
       x *= std::pow(static_cast<Real>(10), static_cast<Real>(scale));
     }
     x = std::nearbyint(x);
     const auto max_abs = Derived::powers_of_ten()[precision + 76];
     if (x >= max_abs) {
       return Status::Invalid("Cannot convert ", real,
                              " to Decimal256(precision = ", precision,
                              ", scale = ", scale, "): overflow");
     }
     // Extract parts
     const auto part3 = std::floor(std::ldexp(x, -192));
     x -= std::ldexp(part3, 192);
     const auto part2 = std::floor(std::ldexp(x, -128));
     x -= std::ldexp(part2, 128);
     const auto part1 = std::floor(std::ldexp(x, -64));
     x -= std::ldexp(part1, 64);
     const auto part0 = x;

     DCHECK_GE(part3, 0);
     DCHECK_LT(part3, 1.8446744073709552e+19);  // 2**64
     DCHECK_GE(part2, 0);
     DCHECK_LT(part2, 1.8446744073709552e+19);  // 2**64
     DCHECK_GE(part1, 0);
     DCHECK_LT(part1, 1.8446744073709552e+19);  // 2**64
     DCHECK_GE(part0, 0);
     DCHECK_LT(part0, 1.8446744073709552e+19);  // 2**64
     return Decimal256(std::array<uint64_t, 4>{
         static_cast<uint64_t>(part0), static_cast<uint64_t>(part1),
         static_cast<uint64_t>(part2), static_cast<uint64_t>(part3)});
   }

   static Result<Decimal256> FromReal(Real x, int32_t precision, int32_t scale) {
     DCHECK_GT(precision, 0);
     DCHECK_LE(precision, 76);

     if (!std::isfinite(x)) {
       return Status::Invalid("Cannot convert ", x, " to Decimal256");
     }
     if (x < 0) {
       ARROW_ASSIGN_OR_RAISE(auto dec, FromPositiveReal(-x, precision, scale));
       return dec.Negate();
     } else {
       // Includes negative zero
       return FromPositiveReal(x, precision, scale);
     }
   }

   static Real ToRealPositive(const Decimal256& decimal, int32_t scale) {
     DCHECK_GE(decimal, 0);
     Real x = 0;
     const auto& parts = decimal.little_endian_array();
     x += Derived::two_to_192(static_cast<Real>(parts[3]));
     x += Derived::two_to_128(static_cast<Real>(parts[2]));
     x += Derived::two_to_64(static_cast<Real>(parts[1]));
     x += static_cast<Real>(parts[0]);
     if (scale >= -76 && scale <= 76) {
       x *= Derived::powers_of_ten()[-scale + 76];
     } else {
       x *= std::pow(static_cast<Real>(10), static_cast<Real>(-scale));
     }
     return x;
   }

   static Real ToReal(Decimal256 decimal, int32_t scale) {
     if (decimal.little_endian_array()[3] & (1ULL << 63)) {
       // Convert the absolute value to avoid precision loss
       decimal.Negate();
       return -ToRealPositive(decimal, scale);
     } else {
       return ToRealPositive(decimal, scale);
     }
   }
 };

 struct Decimal256FloatConversion
     : public Decimal256RealConversion<float, Decimal256FloatConversion> {
   static constexpr const float* powers_of_ten() { return kFloatPowersOfTen76; }

   static float two_to_64(float x) { return x * 1.8446744e+19f; }
   static float two_to_128(float x) { return x == 0 ? 0 : INFINITY; }
   static float two_to_192(float x) { return x == 0 ? 0 : INFINITY; }
 };

 struct Decimal256DoubleConversion
     : public Decimal256RealConversion<double, Decimal256DoubleConversion> {
   static constexpr const double* powers_of_ten() { return kDoublePowersOfTen76; }

   static double two_to_64(double x) { return x * 1.8446744073709552e+19; }
   static double two_to_128(double x) { return x * 3.402823669209385e+38; }
   static double two_to_192(double x) { return x * 6.277101735386681e+57; }
 };

 }  // namespace

 Result<Decimal256> Decimal256::FromReal(float x, int32_t precision, int32_t scale) {
   return Decimal256FloatConversion::FromReal(x, precision, scale);
 }

 Result<Decimal256> Decimal256::FromReal(double x, int32_t precision, int32_t scale) {
   return Decimal256DoubleConversion::FromReal(x, precision, scale);
 }

 float Decimal256::ToFloat(int32_t scale) const {
   return Decimal256FloatConversion::ToReal(*this, scale);
 }

 double Decimal256::ToDouble(int32_t scale) const {
   return Decimal256DoubleConversion::ToReal(*this, scale);
 }

 std::ostream& operator<<(std::ostream& os, const Decimal256& decimal) {
   os << decimal.ToIntegerString();
   return os;
 }
 }  // namespace arrow