be/src/util/decimal-util.h - impala - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #pragma once

 #include <endian.h>

 #include <cstdint>
 #include <functional>

 #include "common/compiler-util.h"
 #include "common/logging.h"
 #include "runtime/multi-precision.h"
 #include "runtime/types.h"
 #include "util/arithmetic-util.h"
 #include "util/bit-util.h"
 #include "util/decimal-constants.h"

 namespace impala {

 class DecimalUtil {
  public:
   // Helper function that checks for multiplication overflow. We only check for overflow
   // if may_overflow is false.
   template <typename T>
   static T SafeMultiply(T a, T b, bool may_overflow) {
     T result = ArithmeticUtil::AsUnsigned<std::multiplies>(a, b);
     DCHECK(may_overflow || a == 0 || result / a == b);
     return result;
   }

   static int256_t SafeMultiply(int256_t a, int256_t b, bool may_overflow) {
     int256_t result = a * b;
     DCHECK(may_overflow || a == 0 || result / a == b);
     return result;
   }

   template<typename T>
   static T MultiplyByScale(const T& v, int scale, bool may_overflow) {
     T multiplier = GetScaleMultiplier<T>(scale);
     DCHECK(multiplier > 0);
     return SafeMultiply(v, multiplier, may_overflow);
   }

   template<typename T>
   static T GetScaleMultiplier(int scale) {
     DCHECK_GE(scale, 0);
     T result = 1;
     for (int i = 0; i < scale; ++i) {
       // Verify that the result of multiplication does not overflow.
       // TODO: This is not an ideal way to check for overflow because if T is signed, the
       // behavior is undefined in case of overflow. Replace this with a better overflow
       // check.
       DCHECK_GE(result * 10, result);
       result *= 10;
     }
     return result;
   }

   /// Helper function to scale down values by 10^delta_scale, truncating if
   /// round is false or rounding otherwise.
   template<typename T>
   static inline T ScaleDownAndRound(T value, int delta_scale, bool round) {
     DCHECK_GT(delta_scale, 0);
     T divisor = DecimalUtil::GetScaleMultiplier<T>(delta_scale);
     if (divisor > 0) {
       DCHECK(divisor % 2 == 0);
       T result = value / divisor;
       if (round) {
         T remainder = value % divisor;
         // In general, shifting down the multiplier is not safe, but we know
         // here that it is a multiple of two.
         if (abs(remainder) >= (divisor >> 1)) {
           // Bias at zero must be corrected by sign of dividend.
           result += Sign(value);
         }
       }
       return result;
     } else {
       DCHECK(divisor == -1);
       return 0;
     }
   }

   /// Write decimals as big endian (byte comparable) in fixed_len_size bytes.
   template<typename T>
   static inline void EncodeToFixedLenByteArray(
       uint8_t* buffer, int fixed_len_size, const T& v) {
     DCHECK_GT(fixed_len_size, 0);
     DCHECK_LE(fixed_len_size, sizeof(T));

 #if __BYTE_ORDER == __LITTLE_ENDIAN
     BitUtil::ByteSwap(buffer, &v, fixed_len_size);
 #else
     memcpy(buffer, &v + sizeof(T) - fixed_len_size, fixed_len_size);
 #endif

 #ifndef NDEBUG
 #if __BYTE_ORDER == __LITTLE_ENDIAN
     const int8_t* skipped_bytes_start = reinterpret_cast<const int8_t*>(&v) +
         fixed_len_size;
 #else
     const int8_t* skipped_bytes_start = reinterpret_cast<const int8_t*>(&v);
 #endif
     // On debug, verify that the skipped bytes are what we expect.
     for (int i = 0; i < sizeof(T) - fixed_len_size; ++i) {
       DCHECK_EQ(skipped_bytes_start[i], v.value() < 0 ? -1 : 0);
     }
 #endif
   }

   template <typename T>
   static inline void DecodeFromFixedLenByteArray(
       const uint8_t* RESTRICT buffer, int fixed_len_size, T* RESTRICT v) {
     DCHECK_GT(fixed_len_size, 0);
 #if __BYTE_ORDER != __LITTLE_ENDIAN
     static_assert(false, "Byte order must be little-endian");
 #endif
     if (LIKELY(sizeof(T) >= fixed_len_size)) {
       // We need to sign extend val. For example, if the original value was
       // -1, the original bytes were -1,-1,-1,-1. If we only wrote out 1 byte, ByteSwap()
       // only fills in the first byte of 'v' - the least significant byte. We initialize
       // the value to either 0 or -1 to ensure that the result is correctly sign-extended.

       // GCC can optimize this code to an instruction pair of movsbq and sarq with no
       // branches.
       int8_t most_significant_byte = reinterpret_cast<const int8_t*>(buffer)[0];
       v->set_value(most_significant_byte >= 0 ? 0 : -1);
       BitUtil::ByteSwap(reinterpret_cast<int8_t*>(v), buffer, fixed_len_size);
     } else {
       // If the destination 'v' is smaller than the input, discard the upper bytes.
       // These bytes should only contain the sign-extended value if they were encoded
       // correctly, so are not required.
       int bytes_to_discard = fixed_len_size - sizeof(T);
       BitUtil::ByteSwap(reinterpret_cast<int8_t*>(v),
           buffer + bytes_to_discard, sizeof(T));
     }
   }

   // Used to skip checking overflow in multiply of decimal values.
   static inline int Clz(const int128_t& v) {
     // GCC leaves __builtin_clz undefined for zero.
     uint64_t high = static_cast<uint64_t>(v >> 64);
     if (high != 0) return __builtin_clzll(high);
     uint64_t low = static_cast<uint64_t>(v);
     if (low != 0) return 64 + __builtin_clzll(low);
     return 128;
   }

 };

 template <>
 inline int32_t DecimalUtil::GetScaleMultiplier<int32_t>(int scale) {
   DCHECK_GE(scale, 0);
   static const int32_t values[] = {
       1,
       10,
       100,
       1000,
       10000,
       100000,
       1000000,
       10000000,
       100000000,
       1000000000};
   DCHECK_GE(sizeof(values) / sizeof(int32_t), ColumnType::MAX_DECIMAL4_PRECISION);
   if (LIKELY(scale < 10)) return values[scale];
   return -1;  // Overflow
 }

 template <>
 inline int64_t DecimalUtil::GetScaleMultiplier<int64_t>(int scale) {
   DCHECK_GE(scale, 0);
   static const int64_t values[] = {
       1ll,
       10ll,
       100ll,
       1000ll,
       10000ll,
       100000ll,
       1000000ll,
       10000000ll,
       100000000ll,
       1000000000ll,
       10000000000ll,
       100000000000ll,
       1000000000000ll,
       10000000000000ll,
       100000000000000ll,
       1000000000000000ll,
       10000000000000000ll,
       100000000000000000ll,
       1000000000000000000ll};
   DCHECK_GE(sizeof(values) / sizeof(int64_t), ColumnType::MAX_DECIMAL8_PRECISION);
   if (LIKELY(scale < 19)) return values[scale];
   return -1;  // Overflow
 }

 template <>
 inline int128_t DecimalUtil::GetScaleMultiplier<int128_t>(int scale) {
   DCHECK_GE(scale, 0);
   static const int128_t values[] = {
       static_cast<int128_t>(1ll),
       static_cast<int128_t>(10ll),
       static_cast<int128_t>(100ll),
       static_cast<int128_t>(1000ll),
       static_cast<int128_t>(10000ll),
       static_cast<int128_t>(100000ll),
       static_cast<int128_t>(1000000ll),
       static_cast<int128_t>(10000000ll),
       static_cast<int128_t>(100000000ll),
       static_cast<int128_t>(1000000000ll),
       static_cast<int128_t>(10000000000ll),
       static_cast<int128_t>(100000000000ll),
       static_cast<int128_t>(1000000000000ll),
       static_cast<int128_t>(10000000000000ll),
       static_cast<int128_t>(100000000000000ll),
       static_cast<int128_t>(1000000000000000ll),
       static_cast<int128_t>(10000000000000000ll),
       static_cast<int128_t>(100000000000000000ll),
       static_cast<int128_t>(1000000000000000000ll),
       static_cast<int128_t>(1000000000000000000ll) * 10ll,
       static_cast<int128_t>(1000000000000000000ll) * 100ll,
       static_cast<int128_t>(1000000000000000000ll) * 1000ll,
       static_cast<int128_t>(1000000000000000000ll) * 10000ll,
       static_cast<int128_t>(1000000000000000000ll) * 100000ll,
       static_cast<int128_t>(1000000000000000000ll) * 1000000ll,
       static_cast<int128_t>(1000000000000000000ll) * 10000000ll,
       static_cast<int128_t>(1000000000000000000ll) * 100000000ll,
       static_cast<int128_t>(1000000000000000000ll) * 1000000000ll,
       static_cast<int128_t>(1000000000000000000ll) * 10000000000ll,
       static_cast<int128_t>(1000000000000000000ll) * 100000000000ll,
       static_cast<int128_t>(1000000000000000000ll) * 1000000000000ll,
       static_cast<int128_t>(1000000000000000000ll) * 10000000000000ll,
       static_cast<int128_t>(1000000000000000000ll) * 100000000000000ll,
       static_cast<int128_t>(1000000000000000000ll) * 1000000000000000ll,
       static_cast<int128_t>(1000000000000000000ll) * 10000000000000000ll,
       static_cast<int128_t>(1000000000000000000ll) * 100000000000000000ll,
       static_cast<int128_t>(1000000000000000000ll) * 100000000000000000ll * 10ll,
       static_cast<int128_t>(1000000000000000000ll) * 100000000000000000ll * 100ll,
       static_cast<int128_t>(1000000000000000000ll) * 100000000000000000ll * 1000ll};
   DCHECK_GE(sizeof(values) / sizeof(int128_t), ColumnType::MAX_PRECISION);
   if (LIKELY(scale < 39)) return values[scale];
   return -1;  // Overflow
 }
 }
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	#pragma once

	#include <endian.h>

	#include <cstdint>
	#include <functional>

	#include "common/compiler-util.h"
	#include "common/logging.h"
	#include "runtime/multi-precision.h"
	#include "runtime/types.h"
	#include "util/arithmetic-util.h"
	#include "util/bit-util.h"
	#include "util/decimal-constants.h"

	namespace impala {

	class DecimalUtil {
	public:
	// Helper function that checks for multiplication overflow. We only check for overflow
	// if may_overflow is false.
	template <typename T>
	static T SafeMultiply(T a, T b, bool may_overflow) {
	T result = ArithmeticUtil::AsUnsigned<std::multiplies>(a, b);
	DCHECK(may_overflow \|\| a == 0 \|\| result / a == b);
	return result;
	}

	static int256_t SafeMultiply(int256_t a, int256_t b, bool may_overflow) {
	int256_t result = a * b;
	DCHECK(may_overflow \|\| a == 0 \|\| result / a == b);
	return result;
	}

	template<typename T>
	static T MultiplyByScale(const T& v, int scale, bool may_overflow) {
	T multiplier = GetScaleMultiplier<T>(scale);
	DCHECK(multiplier > 0);
	return SafeMultiply(v, multiplier, may_overflow);
	}

	template<typename T>
	static T GetScaleMultiplier(int scale) {
	DCHECK_GE(scale, 0);
	T result = 1;
	for (int i = 0; i < scale; ++i) {
	// Verify that the result of multiplication does not overflow.
	// TODO: This is not an ideal way to check for overflow because if T is signed, the
	// behavior is undefined in case of overflow. Replace this with a better overflow
	// check.
	DCHECK_GE(result * 10, result);
	result *= 10;
	}
	return result;
	}

	/// Helper function to scale down values by 10^delta_scale, truncating if
	/// round is false or rounding otherwise.
	template<typename T>
	static inline T ScaleDownAndRound(T value, int delta_scale, bool round) {
	DCHECK_GT(delta_scale, 0);
	T divisor = DecimalUtil::GetScaleMultiplier<T>(delta_scale);
	if (divisor > 0) {
	DCHECK(divisor % 2 == 0);
	T result = value / divisor;
	if (round) {
	T remainder = value % divisor;
	// In general, shifting down the multiplier is not safe, but we know
	// here that it is a multiple of two.
	if (abs(remainder) >= (divisor >> 1)) {
	// Bias at zero must be corrected by sign of dividend.
	result += Sign(value);
	}
	}
	return result;
	} else {
	DCHECK(divisor == -1);
	return 0;
	}
	}

	/// Write decimals as big endian (byte comparable) in fixed_len_size bytes.
	template<typename T>
	static inline void EncodeToFixedLenByteArray(
	uint8_t* buffer, int fixed_len_size, const T& v) {
	DCHECK_GT(fixed_len_size, 0);
	DCHECK_LE(fixed_len_size, sizeof(T));

	#if __BYTE_ORDER == __LITTLE_ENDIAN
	BitUtil::ByteSwap(buffer, &v, fixed_len_size);
	#else
	memcpy(buffer, &v + sizeof(T) - fixed_len_size, fixed_len_size);
	#endif

	#ifndef NDEBUG
	#if __BYTE_ORDER == __LITTLE_ENDIAN
	const int8_t* skipped_bytes_start = reinterpret_cast<const int8_t*>(&v) +
	fixed_len_size;
	#else
	const int8_t* skipped_bytes_start = reinterpret_cast<const int8_t*>(&v);
	#endif
	// On debug, verify that the skipped bytes are what we expect.
	for (int i = 0; i < sizeof(T) - fixed_len_size; ++i) {
	DCHECK_EQ(skipped_bytes_start[i], v.value() < 0 ? -1 : 0);
	}
	#endif
	}

	template <typename T>
	static inline void DecodeFromFixedLenByteArray(
	const uint8_t* RESTRICT buffer, int fixed_len_size, T* RESTRICT v) {
	DCHECK_GT(fixed_len_size, 0);
	#if __BYTE_ORDER != __LITTLE_ENDIAN
	static_assert(false, "Byte order must be little-endian");
	#endif
	if (LIKELY(sizeof(T) >= fixed_len_size)) {
	// We need to sign extend val. For example, if the original value was
	// -1, the original bytes were -1,-1,-1,-1. If we only wrote out 1 byte, ByteSwap()
	// only fills in the first byte of 'v' - the least significant byte. We initialize
	// the value to either 0 or -1 to ensure that the result is correctly sign-extended.

	// GCC can optimize this code to an instruction pair of movsbq and sarq with no
	// branches.
	int8_t most_significant_byte = reinterpret_cast<const int8_t*>(buffer)[0];
	v->set_value(most_significant_byte >= 0 ? 0 : -1);
	BitUtil::ByteSwap(reinterpret_cast<int8_t*>(v), buffer, fixed_len_size);
	} else {
	// If the destination 'v' is smaller than the input, discard the upper bytes.
	// These bytes should only contain the sign-extended value if they were encoded
	// correctly, so are not required.
	int bytes_to_discard = fixed_len_size - sizeof(T);
	BitUtil::ByteSwap(reinterpret_cast<int8_t*>(v),
	buffer + bytes_to_discard, sizeof(T));
	}
	}

	// Used to skip checking overflow in multiply of decimal values.
	static inline int Clz(const int128_t& v) {
	// GCC leaves __builtin_clz undefined for zero.
	uint64_t high = static_cast<uint64_t>(v >> 64);
	if (high != 0) return __builtin_clzll(high);
	uint64_t low = static_cast<uint64_t>(v);
	if (low != 0) return 64 + __builtin_clzll(low);
	return 128;
	}

	};

	template <>
	inline int32_t DecimalUtil::GetScaleMultiplier<int32_t>(int scale) {
	DCHECK_GE(scale, 0);
	static const int32_t values[] = {
	1,
	10,
	100,
	1000,
	10000,
	100000,
	1000000,
	10000000,
	100000000,
	1000000000};
	DCHECK_GE(sizeof(values) / sizeof(int32_t), ColumnType::MAX_DECIMAL4_PRECISION);
	if (LIKELY(scale < 10)) return values[scale];
	return -1; // Overflow
	}

	template <>
	inline int64_t DecimalUtil::GetScaleMultiplier<int64_t>(int scale) {
	DCHECK_GE(scale, 0);
	static const int64_t values[] = {
	1ll,
	10ll,
	100ll,
	1000ll,
	10000ll,
	100000ll,
	1000000ll,
	10000000ll,
	100000000ll,
	1000000000ll,
	10000000000ll,
	100000000000ll,
	1000000000000ll,
	10000000000000ll,
	100000000000000ll,
	1000000000000000ll,
	10000000000000000ll,
	100000000000000000ll,
	1000000000000000000ll};
	DCHECK_GE(sizeof(values) / sizeof(int64_t), ColumnType::MAX_DECIMAL8_PRECISION);
	if (LIKELY(scale < 19)) return values[scale];
	return -1; // Overflow
	}

	template <>
	inline int128_t DecimalUtil::GetScaleMultiplier<int128_t>(int scale) {
	DCHECK_GE(scale, 0);
	static const int128_t values[] = {
	static_cast<int128_t>(1ll),
	static_cast<int128_t>(10ll),
	static_cast<int128_t>(100ll),
	static_cast<int128_t>(1000ll),
	static_cast<int128_t>(10000ll),
	static_cast<int128_t>(100000ll),
	static_cast<int128_t>(1000000ll),
	static_cast<int128_t>(10000000ll),
	static_cast<int128_t>(100000000ll),
	static_cast<int128_t>(1000000000ll),
	static_cast<int128_t>(10000000000ll),
	static_cast<int128_t>(100000000000ll),
	static_cast<int128_t>(1000000000000ll),
	static_cast<int128_t>(10000000000000ll),
	static_cast<int128_t>(100000000000000ll),
	static_cast<int128_t>(1000000000000000ll),
	static_cast<int128_t>(10000000000000000ll),
	static_cast<int128_t>(100000000000000000ll),
	static_cast<int128_t>(1000000000000000000ll),
	static_cast<int128_t>(1000000000000000000ll) * 10ll,
	static_cast<int128_t>(1000000000000000000ll) * 100ll,
	static_cast<int128_t>(1000000000000000000ll) * 1000ll,
	static_cast<int128_t>(1000000000000000000ll) * 10000ll,
	static_cast<int128_t>(1000000000000000000ll) * 100000ll,
	static_cast<int128_t>(1000000000000000000ll) * 1000000ll,
	static_cast<int128_t>(1000000000000000000ll) * 10000000ll,
	static_cast<int128_t>(1000000000000000000ll) * 100000000ll,
	static_cast<int128_t>(1000000000000000000ll) * 1000000000ll,
	static_cast<int128_t>(1000000000000000000ll) * 10000000000ll,
	static_cast<int128_t>(1000000000000000000ll) * 100000000000ll,
	static_cast<int128_t>(1000000000000000000ll) * 1000000000000ll,
	static_cast<int128_t>(1000000000000000000ll) * 10000000000000ll,
	static_cast<int128_t>(1000000000000000000ll) * 100000000000000ll,
	static_cast<int128_t>(1000000000000000000ll) * 1000000000000000ll,
	static_cast<int128_t>(1000000000000000000ll) * 10000000000000000ll,
	static_cast<int128_t>(1000000000000000000ll) * 100000000000000000ll,
	static_cast<int128_t>(1000000000000000000ll) * 100000000000000000ll * 10ll,
	static_cast<int128_t>(1000000000000000000ll) * 100000000000000000ll * 100ll,
	static_cast<int128_t>(1000000000000000000ll) * 100000000000000000ll * 1000ll};
	DCHECK_GE(sizeof(values) / sizeof(int128_t), ColumnType::MAX_PRECISION);
	if (LIKELY(scale < 39)) return values[scale];
	return -1; // Overflow
	}
	}