| /* |
| * Copyright 2024-present Alibaba Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "paimon/data/decimal.h" |
| |
| #include <algorithm> |
| #include <cmath> |
| #include <cstdlib> |
| |
| #include "arrow/api.h" |
| #include "arrow/scalar.h" |
| #include "arrow/util/decimal.h" |
| #include "paimon/io/byte_order.h" |
| #include "paimon/memory/bytes.h" |
| |
| namespace paimon { |
| const int64_t Decimal::POWERS_OF_TEN[MAX_COMPACT_PRECISION + 1] = {1, |
| 10, |
| 100, |
| 1000, |
| 10000, |
| 100000, |
| 1000000l, |
| 10000000l, |
| 100000000l, |
| 1000000000l, |
| 10000000000l, |
| 100000000000l, |
| 1000000000000l, |
| 10000000000000l, |
| 100000000000000l, |
| 1000000000000000l, |
| 10000000000000000l, |
| 100000000000000000l, |
| 1000000000000000000l}; |
| const Decimal::int128_t Decimal::INT128_MAXIMUM_VALUE = |
| static_cast<Decimal::int128_t>(0x7fffffffffffffff) << 64 | 0xffffffffffffffff; |
| const Decimal::int128_t Decimal::INT128_MINIMUM_VALUE = |
| static_cast<Decimal::int128_t>(0x8000000000000000) << 64; |
| |
| std::string Decimal::ToString() const { |
| auto type = arrow::decimal128(Precision(), Scale()); |
| arrow::Decimal128Scalar scalar(arrow::Decimal128(HighBits(), LowBits()), type); |
| return scalar.ToString(); |
| } |
| |
| std::vector<char> Decimal::ToUnscaledBytes() const { |
| bool positive = value_ >= 0; |
| int32_t valid_bytes = 0; |
| if (positive) { |
| int32_t leading_zero_bytes = count_leading_zero_bytes(value_); |
| valid_bytes = sizeof(value_) - leading_zero_bytes; |
| } else { |
| int32_t leading_all_ones_bytes = count_leading_all_ones_bytes(value_); |
| valid_bytes = sizeof(value_) - leading_all_ones_bytes; |
| } |
| if (valid_bytes == 0) { |
| // if value_ == 0, return one byte with 0; |
| // if value_ == 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, return one byte with 0xFF |
| valid_bytes = 1; |
| } else { |
| // java BigInteger use highest significant bit to determine if the number is positive or |
| // negative, e.g., a positive BigInteger 0xFF, will return two bytes [0x00, 0xFF] |
| bool highest_significant_bit = |
| (static_cast<Decimal::uint128_t>(value_) >> ((valid_bytes - 1) * 8)) & 0x80; |
| if ((positive && highest_significant_bit) || (!positive && !highest_significant_bit)) { |
| valid_bytes += 1; |
| } |
| } |
| std::vector<char> bytes(valid_bytes); |
| memcpy(bytes.data(), &value_, valid_bytes); |
| if (SystemByteOrder() == ByteOrder::PAIMON_LITTLE_ENDIAN) { |
| std::reverse(bytes.data(), bytes.data() + bytes.size()); |
| } |
| return bytes; |
| } |
| |
| Decimal Decimal::FromUnscaledBytes(int32_t precision, int32_t scale, Bytes* bytes) { |
| if (SystemByteOrder() == ByteOrder::PAIMON_LITTLE_ENDIAN) { |
| std::reverse(bytes->data(), bytes->data() + bytes->size()); |
| } |
| Decimal::int128_t value = 0; |
| for (size_t i = 0; i < bytes->size(); ++i) { |
| value |= static_cast<Decimal::uint128_t>(static_cast<uint8_t>((*bytes)[i])) << (8 * i); |
| } |
| // for negative |
| if ((*bytes)[bytes->size() - 1] & 0x80) { |
| for (size_t i = bytes->size(); i < sizeof(Decimal::int128_t); ++i) { |
| value |= static_cast<Decimal::uint128_t>(0xFFull) << (8 * i); |
| } |
| } |
| return Decimal(precision, scale, value); |
| } |
| |
| int32_t Decimal::clz_u128(uint128_t u) { |
| uint64_t hi = u >> 64; |
| uint64_t lo = u; |
| int32_t retval[3] = {__builtin_clzll(hi), __builtin_clzll(lo) + 64, 128}; |
| int32_t idx = !hi + ((!lo) & (!hi)); |
| return retval[idx]; |
| } |
| |
| int32_t Decimal::count_leading_zero_bytes(uint128_t u) { |
| if (u == 0) { |
| return sizeof(u); |
| } |
| int32_t leading_zeros = clz_u128(u); |
| return leading_zeros / 8; |
| } |
| |
| int32_t Decimal::count_leading_all_ones_bytes(uint128_t u) { |
| if (u == 0) { |
| return 0; |
| } |
| int32_t count = 0; |
| for (int32_t i = sizeof(uint128_t) - 1; i >= 0; i--) { |
| if (((u >> (i * 8)) & 0xFF) == 0xFF) { |
| count++; |
| } else { |
| break; |
| } |
| } |
| return count; |
| } |
| |
| Decimal::int128_t Decimal::DownScaleInt128(Decimal::int128_t value, int32_t scale) { |
| while (scale > 0) { |
| int32_t step = std::min(std::abs(scale), MAX_COMPACT_PRECISION); |
| value /= POWERS_OF_TEN[step]; |
| scale -= step; |
| } |
| return value; |
| } |
| |
| Decimal::int128_t Decimal::ScaleInt128(Decimal::int128_t value, int32_t scale, bool* overflow) { |
| *overflow = false; |
| while (scale > 0) { |
| int32_t step = std::min(scale, MAX_COMPACT_PRECISION); |
| if (value > 0 && INT128_MAXIMUM_VALUE / POWERS_OF_TEN[step] < value) { |
| *overflow = true; |
| return INT128_MAXIMUM_VALUE; |
| } else if (value < 0 && INT128_MINIMUM_VALUE / POWERS_OF_TEN[step] > value) { |
| *overflow = true; |
| return INT128_MINIMUM_VALUE; |
| } |
| |
| value *= POWERS_OF_TEN[step]; |
| scale -= step; |
| } |
| return value; |
| } |
| |
| int32_t Decimal::CompareTo(const Decimal& other) const { |
| auto l_value = value_; |
| auto l_scale = scale_; |
| auto r_value = other.value_; |
| auto r_scale = other.scale_; |
| |
| bool l_positive = l_value >= 0; |
| bool r_positive = r_value >= 0; |
| if (l_positive && !r_positive) { |
| return 1; |
| } else if (!l_positive && r_positive) { |
| return -1; |
| } |
| |
| // compare integral parts |
| Decimal::int128_t l_integral = DownScaleInt128(l_value, l_scale); |
| Decimal::int128_t r_integral = DownScaleInt128(r_value, r_scale); |
| |
| if (l_integral < r_integral) { |
| return -1; |
| } else if (l_integral > r_integral) { |
| return 1; |
| } |
| |
| // integral parts are equal, continue comparing fractional parts |
| // unnecessary to check overflow here because the scaled number will not |
| // exceed original ones |
| bool overflow = false, positive = l_value >= 0; |
| l_value -= ScaleInt128(l_integral, l_scale, &overflow); |
| r_value -= ScaleInt128(r_integral, r_scale, &overflow); |
| |
| int32_t diff = l_scale - r_scale; |
| if (diff > 0) { |
| r_value = ScaleInt128(r_value, diff, &overflow); |
| if (overflow) { |
| return positive ? -1 : 1; |
| } |
| } else { |
| l_value = ScaleInt128(l_value, -diff, &overflow); |
| if (overflow) { |
| return positive ? 1 : -1; |
| } |
| } |
| |
| if (l_value < r_value) { |
| return -1; |
| } else if (l_value > r_value) { |
| return 1; |
| } else { |
| return 0; |
| } |
| } |
| |
| } // namespace paimon |