blob: 6c1d94f192ace6d6a0161e08fbdff3943fb707fb [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <endian.h>
#include <cstdint>
#include <functional>
#include "common/compiler-util.h"
#include "common/logging.h"
#include "runtime/multi-precision.h"
#include "runtime/types.h"
#include "util/arithmetic-util.h"
#include "util/bit-util.h"
#include "util/decimal-constants.h"
namespace impala {
class DecimalUtil {
public:
// Helper function that checks for multiplication overflow. We only check for overflow
// if may_overflow is false.
template <typename T>
static T SafeMultiply(T a, T b, bool may_overflow) {
T result = ArithmeticUtil::AsUnsigned<std::multiplies>(a, b);
DCHECK(may_overflow || a == 0 || result / a == b);
return result;
}
static int256_t SafeMultiply(int256_t a, int256_t b, bool may_overflow) {
int256_t result = a * b;
DCHECK(may_overflow || a == 0 || result / a == b);
return result;
}
template<typename T>
static T MultiplyByScale(const T& v, int scale, bool may_overflow) {
T multiplier = GetScaleMultiplier<T>(scale);
DCHECK(multiplier > 0);
return SafeMultiply(v, multiplier, may_overflow);
}
template<typename T>
static T GetScaleMultiplier(int scale) {
DCHECK_GE(scale, 0);
T result = 1;
for (int i = 0; i < scale; ++i) {
// Verify that the result of multiplication does not overflow.
// TODO: This is not an ideal way to check for overflow because if T is signed, the
// behavior is undefined in case of overflow. Replace this with a better overflow
// check.
DCHECK_GE(result * 10, result);
result *= 10;
}
return result;
}
/// Helper function to scale down values by 10^delta_scale, truncating if
/// round is false or rounding otherwise.
template<typename T>
static inline T ScaleDownAndRound(T value, int delta_scale, bool round) {
DCHECK_GT(delta_scale, 0);
T divisor = DecimalUtil::GetScaleMultiplier<T>(delta_scale);
if (divisor > 0) {
DCHECK(divisor % 2 == 0);
T result = value / divisor;
if (round) {
T remainder = value % divisor;
// In general, shifting down the multiplier is not safe, but we know
// here that it is a multiple of two.
if (abs(remainder) >= (divisor >> 1)) {
// Bias at zero must be corrected by sign of dividend.
result += Sign(value);
}
}
return result;
} else {
DCHECK(divisor == -1);
return 0;
}
}
/// Write decimals as big endian (byte comparable) in fixed_len_size bytes.
template<typename T>
static inline void EncodeToFixedLenByteArray(
uint8_t* buffer, int fixed_len_size, const T& v) {
DCHECK_GT(fixed_len_size, 0);
DCHECK_LE(fixed_len_size, sizeof(T));
#if __BYTE_ORDER == __LITTLE_ENDIAN
BitUtil::ByteSwap(buffer, &v, fixed_len_size);
#else
memcpy(buffer, &v + sizeof(T) - fixed_len_size, fixed_len_size);
#endif
#ifndef NDEBUG
#if __BYTE_ORDER == __LITTLE_ENDIAN
const int8_t* skipped_bytes_start = reinterpret_cast<const int8_t*>(&v) +
fixed_len_size;
#else
const int8_t* skipped_bytes_start = reinterpret_cast<const int8_t*>(&v);
#endif
// On debug, verify that the skipped bytes are what we expect.
for (int i = 0; i < sizeof(T) - fixed_len_size; ++i) {
DCHECK_EQ(skipped_bytes_start[i], v.value() < 0 ? -1 : 0);
}
#endif
}
template <typename T>
static inline void DecodeFromFixedLenByteArray(
const uint8_t* RESTRICT buffer, int fixed_len_size, T* RESTRICT v) {
DCHECK_GT(fixed_len_size, 0);
#if __BYTE_ORDER != __LITTLE_ENDIAN
static_assert(false, "Byte order must be little-endian");
#endif
if (LIKELY(sizeof(T) >= fixed_len_size)) {
// We need to sign extend val. For example, if the original value was
// -1, the original bytes were -1,-1,-1,-1. If we only wrote out 1 byte, ByteSwap()
// only fills in the first byte of 'v' - the least significant byte. We initialize
// the value to either 0 or -1 to ensure that the result is correctly sign-extended.
// GCC can optimize this code to an instruction pair of movsbq and sarq with no
// branches.
int8_t most_significant_byte = reinterpret_cast<const int8_t*>(buffer)[0];
v->set_value(most_significant_byte >= 0 ? 0 : -1);
BitUtil::ByteSwap(reinterpret_cast<int8_t*>(v), buffer, fixed_len_size);
} else {
// If the destination 'v' is smaller than the input, discard the upper bytes.
// These bytes should only contain the sign-extended value if they were encoded
// correctly, so are not required.
int bytes_to_discard = fixed_len_size - sizeof(T);
BitUtil::ByteSwap(reinterpret_cast<int8_t*>(v),
buffer + bytes_to_discard, sizeof(T));
}
}
// Used to skip checking overflow in multiply of decimal values.
static inline int Clz(const int128_t& v) {
// GCC leaves __builtin_clz undefined for zero.
uint64_t high = static_cast<uint64_t>(v >> 64);
if (high != 0) return __builtin_clzll(high);
uint64_t low = static_cast<uint64_t>(v);
if (low != 0) return 64 + __builtin_clzll(low);
return 128;
}
};
template <>
inline int32_t DecimalUtil::GetScaleMultiplier<int32_t>(int scale) {
DCHECK_GE(scale, 0);
static const int32_t values[] = {
1,
10,
100,
1000,
10000,
100000,
1000000,
10000000,
100000000,
1000000000};
DCHECK_GE(sizeof(values) / sizeof(int32_t), ColumnType::MAX_DECIMAL4_PRECISION);
if (LIKELY(scale < 10)) return values[scale];
return -1; // Overflow
}
template <>
inline int64_t DecimalUtil::GetScaleMultiplier<int64_t>(int scale) {
DCHECK_GE(scale, 0);
static const int64_t values[] = {
1ll,
10ll,
100ll,
1000ll,
10000ll,
100000ll,
1000000ll,
10000000ll,
100000000ll,
1000000000ll,
10000000000ll,
100000000000ll,
1000000000000ll,
10000000000000ll,
100000000000000ll,
1000000000000000ll,
10000000000000000ll,
100000000000000000ll,
1000000000000000000ll};
DCHECK_GE(sizeof(values) / sizeof(int64_t), ColumnType::MAX_DECIMAL8_PRECISION);
if (LIKELY(scale < 19)) return values[scale];
return -1; // Overflow
}
template <>
inline int128_t DecimalUtil::GetScaleMultiplier<int128_t>(int scale) {
DCHECK_GE(scale, 0);
static const int128_t values[] = {
static_cast<int128_t>(1ll),
static_cast<int128_t>(10ll),
static_cast<int128_t>(100ll),
static_cast<int128_t>(1000ll),
static_cast<int128_t>(10000ll),
static_cast<int128_t>(100000ll),
static_cast<int128_t>(1000000ll),
static_cast<int128_t>(10000000ll),
static_cast<int128_t>(100000000ll),
static_cast<int128_t>(1000000000ll),
static_cast<int128_t>(10000000000ll),
static_cast<int128_t>(100000000000ll),
static_cast<int128_t>(1000000000000ll),
static_cast<int128_t>(10000000000000ll),
static_cast<int128_t>(100000000000000ll),
static_cast<int128_t>(1000000000000000ll),
static_cast<int128_t>(10000000000000000ll),
static_cast<int128_t>(100000000000000000ll),
static_cast<int128_t>(1000000000000000000ll),
static_cast<int128_t>(1000000000000000000ll) * 10ll,
static_cast<int128_t>(1000000000000000000ll) * 100ll,
static_cast<int128_t>(1000000000000000000ll) * 1000ll,
static_cast<int128_t>(1000000000000000000ll) * 10000ll,
static_cast<int128_t>(1000000000000000000ll) * 100000ll,
static_cast<int128_t>(1000000000000000000ll) * 1000000ll,
static_cast<int128_t>(1000000000000000000ll) * 10000000ll,
static_cast<int128_t>(1000000000000000000ll) * 100000000ll,
static_cast<int128_t>(1000000000000000000ll) * 1000000000ll,
static_cast<int128_t>(1000000000000000000ll) * 10000000000ll,
static_cast<int128_t>(1000000000000000000ll) * 100000000000ll,
static_cast<int128_t>(1000000000000000000ll) * 1000000000000ll,
static_cast<int128_t>(1000000000000000000ll) * 10000000000000ll,
static_cast<int128_t>(1000000000000000000ll) * 100000000000000ll,
static_cast<int128_t>(1000000000000000000ll) * 1000000000000000ll,
static_cast<int128_t>(1000000000000000000ll) * 10000000000000000ll,
static_cast<int128_t>(1000000000000000000ll) * 100000000000000000ll,
static_cast<int128_t>(1000000000000000000ll) * 100000000000000000ll * 10ll,
static_cast<int128_t>(1000000000000000000ll) * 100000000000000000ll * 100ll,
static_cast<int128_t>(1000000000000000000ll) * 100000000000000000ll * 1000ll};
DCHECK_GE(sizeof(values) / sizeof(int128_t), ColumnType::MAX_PRECISION);
if (LIKELY(scale < 39)) return values[scale];
return -1; // Overflow
}
}