blob: 7aefd1ab9cd7c2fa58074b962ed21cd458a48617 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <algorithm>
#include <array>
#include <climits>
#include <cmath>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <iomanip>
#include <limits>
#include <ostream>
#include <sstream>
#include <string>
#include "arrow/status.h"
#include "arrow/util/decimal.h"
#include "arrow/util/endian.h"
#include "arrow/util/formatting.h"
#include "arrow/util/int128_internal.h"
#include "arrow/util/int_util_internal.h"
#include "arrow/util/logging.h"
#include "arrow/util/macros.h"
#include "arrow/util/value_parsing.h"
namespace arrow {
using internal::SafeLeftShift;
using internal::SafeSignedAdd;
using internal::uint128_t;
Decimal128::Decimal128(const std::string& str) : Decimal128() {
*this = Decimal128::FromString(str).ValueOrDie();
}
static constexpr auto kInt64DecimalDigits =
static_cast<size_t>(std::numeric_limits<int64_t>::digits10);
static constexpr uint64_t kUInt64PowersOfTen[kInt64DecimalDigits + 1] = {
// clang-format off
1ULL,
10ULL,
100ULL,
1000ULL,
10000ULL,
100000ULL,
1000000ULL,
10000000ULL,
100000000ULL,
1000000000ULL,
10000000000ULL,
100000000000ULL,
1000000000000ULL,
10000000000000ULL,
100000000000000ULL,
1000000000000000ULL,
10000000000000000ULL,
100000000000000000ULL,
1000000000000000000ULL
// clang-format on
};
static constexpr float kFloatPowersOfTen[2 * 38 + 1] = {
1e-38f, 1e-37f, 1e-36f, 1e-35f, 1e-34f, 1e-33f, 1e-32f, 1e-31f, 1e-30f, 1e-29f,
1e-28f, 1e-27f, 1e-26f, 1e-25f, 1e-24f, 1e-23f, 1e-22f, 1e-21f, 1e-20f, 1e-19f,
1e-18f, 1e-17f, 1e-16f, 1e-15f, 1e-14f, 1e-13f, 1e-12f, 1e-11f, 1e-10f, 1e-9f,
1e-8f, 1e-7f, 1e-6f, 1e-5f, 1e-4f, 1e-3f, 1e-2f, 1e-1f, 1e0f, 1e1f,
1e2f, 1e3f, 1e4f, 1e5f, 1e6f, 1e7f, 1e8f, 1e9f, 1e10f, 1e11f,
1e12f, 1e13f, 1e14f, 1e15f, 1e16f, 1e17f, 1e18f, 1e19f, 1e20f, 1e21f,
1e22f, 1e23f, 1e24f, 1e25f, 1e26f, 1e27f, 1e28f, 1e29f, 1e30f, 1e31f,
1e32f, 1e33f, 1e34f, 1e35f, 1e36f, 1e37f, 1e38f};
static constexpr double kDoublePowersOfTen[2 * 38 + 1] = {
1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28,
1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17,
1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6,
1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5,
1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16,
1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27,
1e28, 1e29, 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38};
// On the Windows R toolchain, INFINITY is double type instead of float
static constexpr float kFloatInf = std::numeric_limits<float>::infinity();
static constexpr float kFloatPowersOfTen76[2 * 76 + 1] = {
0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1e-45f, 1e-44f, 1e-43f, 1e-42f,
1e-41f, 1e-40f, 1e-39f, 1e-38f, 1e-37f, 1e-36f, 1e-35f,
1e-34f, 1e-33f, 1e-32f, 1e-31f, 1e-30f, 1e-29f, 1e-28f,
1e-27f, 1e-26f, 1e-25f, 1e-24f, 1e-23f, 1e-22f, 1e-21f,
1e-20f, 1e-19f, 1e-18f, 1e-17f, 1e-16f, 1e-15f, 1e-14f,
1e-13f, 1e-12f, 1e-11f, 1e-10f, 1e-9f, 1e-8f, 1e-7f,
1e-6f, 1e-5f, 1e-4f, 1e-3f, 1e-2f, 1e-1f, 1e0f,
1e1f, 1e2f, 1e3f, 1e4f, 1e5f, 1e6f, 1e7f,
1e8f, 1e9f, 1e10f, 1e11f, 1e12f, 1e13f, 1e14f,
1e15f, 1e16f, 1e17f, 1e18f, 1e19f, 1e20f, 1e21f,
1e22f, 1e23f, 1e24f, 1e25f, 1e26f, 1e27f, 1e28f,
1e29f, 1e30f, 1e31f, 1e32f, 1e33f, 1e34f, 1e35f,
1e36f, 1e37f, 1e38f, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf};
static constexpr double kDoublePowersOfTen76[2 * 76 + 1] = {
1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65,
1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53,
1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41,
1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29,
1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17,
1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5,
1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31,
1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 1e40, 1e41, 1e42, 1e43,
1e44, 1e45, 1e46, 1e47, 1e48, 1e49, 1e50, 1e51, 1e52, 1e53, 1e54, 1e55,
1e56, 1e57, 1e58, 1e59, 1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67,
1e68, 1e69, 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76};
namespace {
template <typename Real, typename Derived>
struct DecimalRealConversion {
static Result<Decimal128> FromPositiveReal(Real real, int32_t precision,
int32_t scale) {
auto x = real;
if (scale >= -38 && scale <= 38) {
x *= Derived::powers_of_ten()[scale + 38];
} else {
x *= std::pow(static_cast<Real>(10), static_cast<Real>(scale));
}
x = std::nearbyint(x);
const auto max_abs = Derived::powers_of_ten()[precision + 38];
if (x <= -max_abs || x >= max_abs) {
return Status::Invalid("Cannot convert ", real,
" to Decimal128(precision = ", precision,
", scale = ", scale, "): overflow");
}
// Extract high and low bits
const auto high = std::floor(std::ldexp(x, -64));
const auto low = x - std::ldexp(high, 64);
DCHECK_GE(high, -9.223372036854776e+18); // -2**63
DCHECK_LT(high, 9.223372036854776e+18); // 2**63
DCHECK_GE(low, 0);
DCHECK_LT(low, 1.8446744073709552e+19); // 2**64
return Decimal128(static_cast<int64_t>(high), static_cast<uint64_t>(low));
}
static Result<Decimal128> FromReal(Real x, int32_t precision, int32_t scale) {
DCHECK_GT(precision, 0);
DCHECK_LE(precision, 38);
if (!std::isfinite(x)) {
return Status::Invalid("Cannot convert ", x, " to Decimal128");
}
if (x < 0) {
ARROW_ASSIGN_OR_RAISE(auto dec, FromPositiveReal(-x, precision, scale));
return dec.Negate();
} else {
// Includes negative zero
return FromPositiveReal(x, precision, scale);
}
}
static Real ToRealPositive(const Decimal128& decimal, int32_t scale) {
Real x = static_cast<Real>(decimal.high_bits()) * Derived::two_to_64();
x += static_cast<Real>(decimal.low_bits());
if (scale >= -38 && scale <= 38) {
x *= Derived::powers_of_ten()[-scale + 38];
} else {
x *= std::pow(static_cast<Real>(10), static_cast<Real>(-scale));
}
return x;
}
static Real ToReal(Decimal128 decimal, int32_t scale) {
if (decimal.high_bits() < 0) {
// Convert the absolute value to avoid precision loss
decimal.Negate();
return -ToRealPositive(decimal, scale);
} else {
return ToRealPositive(decimal, scale);
}
}
};
struct DecimalFloatConversion
: public DecimalRealConversion<float, DecimalFloatConversion> {
static constexpr const float* powers_of_ten() { return kFloatPowersOfTen; }
static constexpr float two_to_64() { return 1.8446744e+19f; }
};
struct DecimalDoubleConversion
: public DecimalRealConversion<double, DecimalDoubleConversion> {
static constexpr const double* powers_of_ten() { return kDoublePowersOfTen; }
static constexpr double two_to_64() { return 1.8446744073709552e+19; }
};
} // namespace
Result<Decimal128> Decimal128::FromReal(float x, int32_t precision, int32_t scale) {
return DecimalFloatConversion::FromReal(x, precision, scale);
}
Result<Decimal128> Decimal128::FromReal(double x, int32_t precision, int32_t scale) {
return DecimalDoubleConversion::FromReal(x, precision, scale);
}
float Decimal128::ToFloat(int32_t scale) const {
return DecimalFloatConversion::ToReal(*this, scale);
}
double Decimal128::ToDouble(int32_t scale) const {
return DecimalDoubleConversion::ToReal(*this, scale);
}
template <size_t n>
static void AppendLittleEndianArrayToString(const std::array<uint64_t, n>& array,
std::string* result) {
const auto most_significant_non_zero =
find_if(array.rbegin(), array.rend(), [](uint64_t v) { return v != 0; });
if (most_significant_non_zero == array.rend()) {
result->push_back('0');
return;
}
size_t most_significant_elem_idx = &*most_significant_non_zero - array.data();
std::array<uint64_t, n> copy = array;
constexpr uint32_t k1e9 = 1000000000U;
constexpr size_t kNumBits = n * 64;
// Segments will contain the array split into groups that map to decimal digits,
// in little endian order. Each segment will hold at most 9 decimal digits.
// For example, if the input represents 9876543210123456789, then segments will be
// [123456789, 876543210, 9].
// The max number of segments needed = ceil(kNumBits * log(2) / log(1e9))
// = ceil(kNumBits / 29.897352854) <= ceil(kNumBits / 29).
std::array<uint32_t, (kNumBits + 28) / 29> segments;
size_t num_segments = 0;
uint64_t* most_significant_elem = &copy[most_significant_elem_idx];
do {
// Compute remainder = copy % 1e9 and copy = copy / 1e9.
uint32_t remainder = 0;
uint64_t* elem = most_significant_elem;
do {
// Compute dividend = (remainder << 32) | *elem (a virtual 96-bit integer);
// *elem = dividend / 1e9;
// remainder = dividend % 1e9.
uint32_t hi = static_cast<uint32_t>(*elem >> 32);
uint32_t lo = static_cast<uint32_t>(*elem & BitUtil::LeastSignificantBitMask(32));
uint64_t dividend_hi = (static_cast<uint64_t>(remainder) << 32) | hi;
uint64_t quotient_hi = dividend_hi / k1e9;
remainder = static_cast<uint32_t>(dividend_hi % k1e9);
uint64_t dividend_lo = (static_cast<uint64_t>(remainder) << 32) | lo;
uint64_t quotient_lo = dividend_lo / k1e9;
remainder = static_cast<uint32_t>(dividend_lo % k1e9);
*elem = (quotient_hi << 32) | quotient_lo;
} while (elem-- != copy.data());
segments[num_segments++] = remainder;
} while (*most_significant_elem != 0 || most_significant_elem-- != copy.data());
size_t old_size = result->size();
size_t new_size = old_size + num_segments * 9;
result->resize(new_size, '0');
char* output = &result->at(old_size);
const uint32_t* segment = &segments[num_segments - 1];
internal::StringFormatter<UInt32Type> format;
// First segment is formatted as-is.
format(*segment, [&output](util::string_view formatted) {
memcpy(output, formatted.data(), formatted.size());
output += formatted.size();
});
while (segment != segments.data()) {
--segment;
// Right-pad formatted segment such that e.g. 123 is formatted as "000000123".
output += 9;
format(*segment, [output](util::string_view formatted) {
memcpy(output - formatted.size(), formatted.data(), formatted.size());
});
}
result->resize(output - result->data());
}
std::string Decimal128::ToIntegerString() const {
std::string result;
if (high_bits() < 0) {
result.push_back('-');
Decimal128 abs = *this;
abs.Negate();
AppendLittleEndianArrayToString<2>(
{abs.low_bits(), static_cast<uint64_t>(abs.high_bits())}, &result);
} else {
AppendLittleEndianArrayToString<2>({low_bits(), static_cast<uint64_t>(high_bits())},
&result);
}
return result;
}
Decimal128::operator int64_t() const {
DCHECK(high_bits() == 0 || high_bits() == -1)
<< "Trying to cast a Decimal128 greater than the value range of a "
"int64_t. high_bits_ must be equal to 0 or -1, got: "
<< high_bits();
return static_cast<int64_t>(low_bits());
}
static void AdjustIntegerStringWithScale(int32_t scale, std::string* str) {
if (scale == 0) {
return;
}
DCHECK(str != nullptr);
DCHECK(!str->empty());
const bool is_negative = str->front() == '-';
const auto is_negative_offset = static_cast<int32_t>(is_negative);
const auto len = static_cast<int32_t>(str->size());
const int32_t num_digits = len - is_negative_offset;
const int32_t adjusted_exponent = num_digits - 1 - scale;
/// Note that the -6 is taken from the Java BigDecimal documentation.
if (scale < 0 || adjusted_exponent < -6) {
// Example 1:
// Precondition: *str = "123", is_negative_offset = 0, num_digits = 3, scale = -2,
// adjusted_exponent = 4
// After inserting decimal point: *str = "1.23"
// After appending exponent: *str = "1.23E+4"
// Example 2:
// Precondition: *str = "-123", is_negative_offset = 1, num_digits = 3, scale = 9,
// adjusted_exponent = -7
// After inserting decimal point: *str = "-1.23"
// After appending exponent: *str = "-1.23E-7"
str->insert(str->begin() + 1 + is_negative_offset, '.');
str->push_back('E');
if (adjusted_exponent >= 0) {
str->push_back('+');
}
internal::StringFormatter<Int32Type> format;
format(adjusted_exponent, [str](util::string_view formatted) {
str->append(formatted.data(), formatted.size());
});
return;
}
if (num_digits > scale) {
const auto n = static_cast<size_t>(len - scale);
// Example 1:
// Precondition: *str = "123", len = num_digits = 3, scale = 1, n = 2
// After inserting decimal point: *str = "12.3"
// Example 2:
// Precondition: *str = "-123", len = 4, num_digits = 3, scale = 1, n = 3
// After inserting decimal point: *str = "-12.3"
str->insert(str->begin() + n, '.');
return;
}
// Example 1:
// Precondition: *str = "123", is_negative_offset = 0, num_digits = 3, scale = 4
// After insert: *str = "000123"
// After setting decimal point: *str = "0.0123"
// Example 2:
// Precondition: *str = "-123", is_negative_offset = 1, num_digits = 3, scale = 4
// After insert: *str = "-000123"
// After setting decimal point: *str = "-0.0123"
str->insert(is_negative_offset, scale - num_digits + 2, '0');
str->at(is_negative_offset + 1) = '.';
}
std::string Decimal128::ToString(int32_t scale) const {
std::string str(ToIntegerString());
AdjustIntegerStringWithScale(scale, &str);
return str;
}
// Iterates over input and for each group of kInt64DecimalDigits multiple out by
// the appropriate power of 10 necessary to add source parsed as uint64 and
// then adds the parsed value of source.
static inline void ShiftAndAdd(const util::string_view& input, uint64_t out[],
size_t out_size) {
for (size_t posn = 0; posn < input.size();) {
const size_t group_size = std::min(kInt64DecimalDigits, input.size() - posn);
const uint64_t multiple = kUInt64PowersOfTen[group_size];
uint64_t chunk = 0;
ARROW_CHECK(
internal::ParseValue<UInt64Type>(input.data() + posn, group_size, &chunk));
for (size_t i = 0; i < out_size; ++i) {
uint128_t tmp = out[i];
tmp *= multiple;
tmp += chunk;
out[i] = static_cast<uint64_t>(tmp & 0xFFFFFFFFFFFFFFFFULL);
chunk = static_cast<uint64_t>(tmp >> 64);
}
posn += group_size;
}
}
namespace {
struct DecimalComponents {
util::string_view whole_digits;
util::string_view fractional_digits;
int32_t exponent = 0;
char sign = 0;
bool has_exponent = false;
};
inline bool IsSign(char c) { return c == '-' || c == '+'; }
inline bool IsDot(char c) { return c == '.'; }
inline bool IsDigit(char c) { return c >= '0' && c <= '9'; }
inline bool StartsExponent(char c) { return c == 'e' || c == 'E'; }
inline size_t ParseDigitsRun(const char* s, size_t start, size_t size,
util::string_view* out) {
size_t pos;
for (pos = start; pos < size; ++pos) {
if (!IsDigit(s[pos])) {
break;
}
}
*out = util::string_view(s + start, pos - start);
return pos;
}
bool ParseDecimalComponents(const char* s, size_t size, DecimalComponents* out) {
size_t pos = 0;
if (size == 0) {
return false;
}
// Sign of the number
if (IsSign(s[pos])) {
out->sign = *(s + pos);
++pos;
}
// First run of digits
pos = ParseDigitsRun(s, pos, size, &out->whole_digits);
if (pos == size) {
return !out->whole_digits.empty();
}
// Optional dot (if given in fractional form)
bool has_dot = IsDot(s[pos]);
if (has_dot) {
// Second run of digits
++pos;
pos = ParseDigitsRun(s, pos, size, &out->fractional_digits);
}
if (out->whole_digits.empty() && out->fractional_digits.empty()) {
// Need at least some digits (whole or fractional)
return false;
}
if (pos == size) {
return true;
}
// Optional exponent
if (StartsExponent(s[pos])) {
++pos;
if (pos != size && s[pos] == '+') {
++pos;
}
out->has_exponent = true;
return internal::ParseValue<Int32Type>(s + pos, size - pos, &(out->exponent));
}
return pos == size;
}
inline Status ToArrowStatus(DecimalStatus dstatus, int num_bits) {
switch (dstatus) {
case DecimalStatus::kSuccess:
return Status::OK();
case DecimalStatus::kDivideByZero:
return Status::Invalid("Division by 0 in Decimal", num_bits);
case DecimalStatus::kOverflow:
return Status::Invalid("Overflow occurred during Decimal", num_bits, " operation.");
case DecimalStatus::kRescaleDataLoss:
return Status::Invalid("Rescaling Decimal", num_bits,
" value would cause data loss");
}
return Status::OK();
}
} // namespace
Status Decimal128::FromString(const util::string_view& s, Decimal128* out,
int32_t* precision, int32_t* scale) {
if (s.empty()) {
return Status::Invalid("Empty string cannot be converted to decimal");
}
DecimalComponents dec;
if (!ParseDecimalComponents(s.data(), s.size(), &dec)) {
return Status::Invalid("The string '", s, "' is not a valid decimal number");
}
// Count number of significant digits (without leading zeros)
size_t first_non_zero = dec.whole_digits.find_first_not_of('0');
size_t significant_digits = dec.fractional_digits.size();
if (first_non_zero != std::string::npos) {
significant_digits += dec.whole_digits.size() - first_non_zero;
}
int32_t parsed_precision = static_cast<int32_t>(significant_digits);
int32_t parsed_scale = 0;
if (dec.has_exponent) {
auto adjusted_exponent = dec.exponent;
auto len = static_cast<int32_t>(significant_digits);
parsed_scale = -adjusted_exponent + len - 1;
} else {
parsed_scale = static_cast<int32_t>(dec.fractional_digits.size());
}
if (out != nullptr) {
std::array<uint64_t, 2> little_endian_array = {0, 0};
ShiftAndAdd(dec.whole_digits, little_endian_array.data(), little_endian_array.size());
ShiftAndAdd(dec.fractional_digits, little_endian_array.data(),
little_endian_array.size());
*out =
Decimal128(static_cast<int64_t>(little_endian_array[1]), little_endian_array[0]);
if (parsed_scale < 0) {
*out *= GetScaleMultiplier(-parsed_scale);
}
if (dec.sign == '-') {
out->Negate();
}
}
if (parsed_scale < 0) {
parsed_precision -= parsed_scale;
parsed_scale = 0;
}
if (precision != nullptr) {
*precision = parsed_precision;
}
if (scale != nullptr) {
*scale = parsed_scale;
}
return Status::OK();
}
Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t* precision,
int32_t* scale) {
return FromString(util::string_view(s), out, precision, scale);
}
Status Decimal128::FromString(const char* s, Decimal128* out, int32_t* precision,
int32_t* scale) {
return FromString(util::string_view(s), out, precision, scale);
}
Result<Decimal128> Decimal128::FromString(const util::string_view& s) {
Decimal128 out;
RETURN_NOT_OK(FromString(s, &out, nullptr, nullptr));
return std::move(out);
}
Result<Decimal128> Decimal128::FromString(const std::string& s) {
return FromString(util::string_view(s));
}
Result<Decimal128> Decimal128::FromString(const char* s) {
return FromString(util::string_view(s));
}
// Helper function used by Decimal128::FromBigEndian
static inline uint64_t UInt64FromBigEndian(const uint8_t* bytes, int32_t length) {
// We don't bounds check the length here because this is called by
// FromBigEndian that has a Decimal128 as its out parameters and
// that function is already checking the length of the bytes and only
// passes lengths between zero and eight.
uint64_t result = 0;
// Using memcpy instead of special casing for length
// and doing the conversion in 16, 32 parts, which could
// possibly create unaligned memory access on certain platforms
memcpy(reinterpret_cast<uint8_t*>(&result) + 8 - length, bytes, length);
return ::arrow::BitUtil::FromBigEndian(result);
}
Result<Decimal128> Decimal128::FromBigEndian(const uint8_t* bytes, int32_t length) {
static constexpr int32_t kMinDecimalBytes = 1;
static constexpr int32_t kMaxDecimalBytes = 16;
int64_t high, low;
if (ARROW_PREDICT_FALSE(length < kMinDecimalBytes || length > kMaxDecimalBytes)) {
return Status::Invalid("Length of byte array passed to Decimal128::FromBigEndian ",
"was ", length, ", but must be between ", kMinDecimalBytes,
" and ", kMaxDecimalBytes);
}
// Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the
// sign bit.
const bool is_negative = static_cast<int8_t>(bytes[0]) < 0;
// 1. Extract the high bytes
// Stop byte of the high bytes
const int32_t high_bits_offset = std::max(0, length - 8);
const auto high_bits = UInt64FromBigEndian(bytes, high_bits_offset);
if (high_bits_offset == 8) {
// Avoid undefined shift by 64 below
high = high_bits;
} else {
high = -1 * (is_negative && length < kMaxDecimalBytes);
// Shift left enough bits to make room for the incoming int64_t
high = SafeLeftShift(high, high_bits_offset * CHAR_BIT);
// Preserve the upper bits by inplace OR-ing the int64_t
high |= high_bits;
}
// 2. Extract the low bytes
// Stop byte of the low bytes
const int32_t low_bits_offset = std::min(length, 8);
const auto low_bits =
UInt64FromBigEndian(bytes + high_bits_offset, length - high_bits_offset);
if (low_bits_offset == 8) {
// Avoid undefined shift by 64 below
low = low_bits;
} else {
// Sign extend the low bits if necessary
low = -1 * (is_negative && length < 8);
// Shift left enough bits to make room for the incoming int64_t
low = SafeLeftShift(low, low_bits_offset * CHAR_BIT);
// Preserve the upper bits by inplace OR-ing the int64_t
low |= low_bits;
}
return Decimal128(high, static_cast<uint64_t>(low));
}
Status Decimal128::ToArrowStatus(DecimalStatus dstatus) const {
return arrow::ToArrowStatus(dstatus, 128);
}
std::ostream& operator<<(std::ostream& os, const Decimal128& decimal) {
os << decimal.ToIntegerString();
return os;
}
Decimal256::Decimal256(const std::string& str) : Decimal256() {
*this = Decimal256::FromString(str).ValueOrDie();
}
std::string Decimal256::ToIntegerString() const {
std::string result;
if (static_cast<int64_t>(little_endian_array()[3]) < 0) {
result.push_back('-');
Decimal256 abs = *this;
abs.Negate();
AppendLittleEndianArrayToString(abs.little_endian_array(), &result);
} else {
AppendLittleEndianArrayToString(little_endian_array(), &result);
}
return result;
}
std::string Decimal256::ToString(int32_t scale) const {
std::string str(ToIntegerString());
AdjustIntegerStringWithScale(scale, &str);
return str;
}
Status Decimal256::FromString(const util::string_view& s, Decimal256* out,
int32_t* precision, int32_t* scale) {
if (s.empty()) {
return Status::Invalid("Empty string cannot be converted to decimal");
}
DecimalComponents dec;
if (!ParseDecimalComponents(s.data(), s.size(), &dec)) {
return Status::Invalid("The string '", s, "' is not a valid decimal number");
}
// Count number of significant digits (without leading zeros)
size_t first_non_zero = dec.whole_digits.find_first_not_of('0');
size_t significant_digits = dec.fractional_digits.size();
if (first_non_zero != std::string::npos) {
significant_digits += dec.whole_digits.size() - first_non_zero;
}
if (precision != nullptr) {
*precision = static_cast<int32_t>(significant_digits);
}
if (scale != nullptr) {
if (dec.has_exponent) {
auto adjusted_exponent = dec.exponent;
auto len = static_cast<int32_t>(significant_digits);
*scale = -adjusted_exponent + len - 1;
} else {
*scale = static_cast<int32_t>(dec.fractional_digits.size());
}
}
if (out != nullptr) {
std::array<uint64_t, 4> little_endian_array = {0, 0, 0, 0};
ShiftAndAdd(dec.whole_digits, little_endian_array.data(), little_endian_array.size());
ShiftAndAdd(dec.fractional_digits, little_endian_array.data(),
little_endian_array.size());
*out = Decimal256(little_endian_array);
if (dec.sign == '-') {
out->Negate();
}
}
return Status::OK();
}
Status Decimal256::FromString(const std::string& s, Decimal256* out, int32_t* precision,
int32_t* scale) {
return FromString(util::string_view(s), out, precision, scale);
}
Status Decimal256::FromString(const char* s, Decimal256* out, int32_t* precision,
int32_t* scale) {
return FromString(util::string_view(s), out, precision, scale);
}
Result<Decimal256> Decimal256::FromString(const util::string_view& s) {
Decimal256 out;
RETURN_NOT_OK(FromString(s, &out, nullptr, nullptr));
return std::move(out);
}
Result<Decimal256> Decimal256::FromString(const std::string& s) {
return FromString(util::string_view(s));
}
Result<Decimal256> Decimal256::FromString(const char* s) {
return FromString(util::string_view(s));
}
Result<Decimal256> Decimal256::FromBigEndian(const uint8_t* bytes, int32_t length) {
static constexpr int32_t kMinDecimalBytes = 1;
static constexpr int32_t kMaxDecimalBytes = 32;
std::array<uint64_t, 4> little_endian_array;
if (ARROW_PREDICT_FALSE(length < kMinDecimalBytes || length > kMaxDecimalBytes)) {
return Status::Invalid("Length of byte array passed to Decimal128::FromBigEndian ",
"was ", length, ", but must be between ", kMinDecimalBytes,
" and ", kMaxDecimalBytes);
}
// Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the
// sign bit.
const bool is_negative = static_cast<int8_t>(bytes[0]) < 0;
for (int word_idx = 0; word_idx < 4; word_idx++) {
const int32_t word_length = std::min(length, static_cast<int32_t>(sizeof(uint64_t)));
if (word_length == 8) {
// Full words can be assigned as is (and are UB with the shift below).
little_endian_array[word_idx] =
UInt64FromBigEndian(bytes + length - word_length, word_length);
} else {
// Sign extend the word its if necessary
uint64_t word = -1 * is_negative;
if (length > 0) {
// Incorporate the actual values if present.
// Shift left enough bits to make room for the incoming int64_t
word = SafeLeftShift(word, word_length * CHAR_BIT);
// Preserve the upper bits by inplace OR-ing the int64_t
word |= UInt64FromBigEndian(bytes + length - word_length, word_length);
}
little_endian_array[word_idx] = word;
}
// Move on to the next word.
length -= word_length;
}
return Decimal256(little_endian_array);
}
Status Decimal256::ToArrowStatus(DecimalStatus dstatus) const {
return arrow::ToArrowStatus(dstatus, 256);
}
namespace {
template <typename Real, typename Derived>
struct Decimal256RealConversion {
static Result<Decimal256> FromPositiveReal(Real real, int32_t precision,
int32_t scale) {
auto x = real;
if (scale >= -76 && scale <= 76) {
x *= Derived::powers_of_ten()[scale + 76];
} else {
x *= std::pow(static_cast<Real>(10), static_cast<Real>(scale));
}
x = std::nearbyint(x);
const auto max_abs = Derived::powers_of_ten()[precision + 76];
if (x >= max_abs) {
return Status::Invalid("Cannot convert ", real,
" to Decimal256(precision = ", precision,
", scale = ", scale, "): overflow");
}
// Extract parts
const auto part3 = std::floor(std::ldexp(x, -192));
x -= std::ldexp(part3, 192);
const auto part2 = std::floor(std::ldexp(x, -128));
x -= std::ldexp(part2, 128);
const auto part1 = std::floor(std::ldexp(x, -64));
x -= std::ldexp(part1, 64);
const auto part0 = x;
DCHECK_GE(part3, 0);
DCHECK_LT(part3, 1.8446744073709552e+19); // 2**64
DCHECK_GE(part2, 0);
DCHECK_LT(part2, 1.8446744073709552e+19); // 2**64
DCHECK_GE(part1, 0);
DCHECK_LT(part1, 1.8446744073709552e+19); // 2**64
DCHECK_GE(part0, 0);
DCHECK_LT(part0, 1.8446744073709552e+19); // 2**64
return Decimal256(std::array<uint64_t, 4>{
static_cast<uint64_t>(part0), static_cast<uint64_t>(part1),
static_cast<uint64_t>(part2), static_cast<uint64_t>(part3)});
}
static Result<Decimal256> FromReal(Real x, int32_t precision, int32_t scale) {
DCHECK_GT(precision, 0);
DCHECK_LE(precision, 76);
if (!std::isfinite(x)) {
return Status::Invalid("Cannot convert ", x, " to Decimal256");
}
if (x < 0) {
ARROW_ASSIGN_OR_RAISE(auto dec, FromPositiveReal(-x, precision, scale));
return dec.Negate();
} else {
// Includes negative zero
return FromPositiveReal(x, precision, scale);
}
}
static Real ToRealPositive(const Decimal256& decimal, int32_t scale) {
DCHECK_GE(decimal, 0);
Real x = 0;
const auto& parts = decimal.little_endian_array();
x += Derived::two_to_192(static_cast<Real>(parts[3]));
x += Derived::two_to_128(static_cast<Real>(parts[2]));
x += Derived::two_to_64(static_cast<Real>(parts[1]));
x += static_cast<Real>(parts[0]);
if (scale >= -76 && scale <= 76) {
x *= Derived::powers_of_ten()[-scale + 76];
} else {
x *= std::pow(static_cast<Real>(10), static_cast<Real>(-scale));
}
return x;
}
static Real ToReal(Decimal256 decimal, int32_t scale) {
if (decimal.little_endian_array()[3] & (1ULL << 63)) {
// Convert the absolute value to avoid precision loss
decimal.Negate();
return -ToRealPositive(decimal, scale);
} else {
return ToRealPositive(decimal, scale);
}
}
};
struct Decimal256FloatConversion
: public Decimal256RealConversion<float, Decimal256FloatConversion> {
static constexpr const float* powers_of_ten() { return kFloatPowersOfTen76; }
static float two_to_64(float x) { return x * 1.8446744e+19f; }
static float two_to_128(float x) { return x == 0 ? 0 : INFINITY; }
static float two_to_192(float x) { return x == 0 ? 0 : INFINITY; }
};
struct Decimal256DoubleConversion
: public Decimal256RealConversion<double, Decimal256DoubleConversion> {
static constexpr const double* powers_of_ten() { return kDoublePowersOfTen76; }
static double two_to_64(double x) { return x * 1.8446744073709552e+19; }
static double two_to_128(double x) { return x * 3.402823669209385e+38; }
static double two_to_192(double x) { return x * 6.277101735386681e+57; }
};
} // namespace
Result<Decimal256> Decimal256::FromReal(float x, int32_t precision, int32_t scale) {
return Decimal256FloatConversion::FromReal(x, precision, scale);
}
Result<Decimal256> Decimal256::FromReal(double x, int32_t precision, int32_t scale) {
return Decimal256DoubleConversion::FromReal(x, precision, scale);
}
float Decimal256::ToFloat(int32_t scale) const {
return Decimal256FloatConversion::ToReal(*this, scale);
}
double Decimal256::ToDouble(int32_t scale) const {
return Decimal256DoubleConversion::ToReal(*this, scale);
}
std::ostream& operator<<(std::ostream& os, const Decimal256& decimal) {
os << decimal.ToIntegerString();
return os;
}
} // namespace arrow