be/src/util/string_parser.hpp - doris - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
 // This file is copied from
 // https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/string-parser.hpp
 // and modified by Doris

 #pragma once

 #include <fast_float/fast_float.h>
 #include <fast_float/parse_number.h>
 #include <glog/logging.h>

 #include <cstdlib>
 // IWYU pragma: no_include <bits/std_abs.h>
 #include <cmath> // IWYU pragma: keep
 #include <cstdint>
 #include <limits>
 #include <map>
 #include <string>
 #include <system_error>
 #include <type_traits>
 #include <utility>

 #include "common/compiler_util.h" // IWYU pragma: keep
 #include "common/status.h"
 #include "runtime/large_int_value.h"
 #include "runtime/primitive_type.h"
 #include "vec/common/int_exp.h"
 #include "vec/common/string_utils/string_utils.h"
 #include "vec/core/extended_types.h"
 #include "vec/core/wide_integer.h"
 #include "vec/data_types/data_type_decimal.h"
 #include "vec/data_types/number_traits.h"

 namespace doris {
 namespace vectorized {
 template <DecimalNativeTypeConcept T>
 struct Decimal;
 } // namespace vectorized

 // Utility functions for doing atoi/atof on non-null terminated strings.  On micro benchmarks,
 // this is significantly faster than libc (atoi/strtol and atof/strtod).
 //
 // Strings with leading and trailing whitespaces are accepted.
 // Branching is heavily optimized for the non-whitespace successful case.
 // All the StringTo* functions first parse the input string assuming it has no leading whitespace.
 // If that first attempt was unsuccessful, these functions retry the parsing after removing
 // whitespace. Therefore, strings with whitespace take a perf hit on branch mis-prediction.
 //
 // For overflows, we are following the mysql behavior, to cap values at the max/min value for that
 // data type.  This is different from hive, which returns NULL for overflow slots for int types
 // and inf/-inf for float types.
 //
 // Things we tried that did not work:
 //  - lookup table for converting character to digit
 // Improvements (TODO):
 //  - Validate input using _sidd_compare_ranges
 //  - Since we know the length, we can parallelize this: i.e. result = 100*s[0] + 10*s[1] + s[2]
 class StringParser {
 public:
     enum ParseResult { PARSE_SUCCESS = 0, PARSE_FAILURE, PARSE_OVERFLOW, PARSE_UNDERFLOW };

     template <typename T>
     static T numeric_limits(bool negative) {
         if constexpr (std::is_same_v<T, __int128>) {
             return negative ? MIN_INT128 : MAX_INT128;
         } else {
             return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
         }
     }

     template <typename T>
     static T get_scale_multiplier(int scale) {
         static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
                               std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
                       "You can only instantiate as int32_t, int64_t, __int128.");
         if constexpr (std::is_same_v<T, int32_t>) {
             return common::exp10_i32(scale);
         } else if constexpr (std::is_same_v<T, int64_t>) {
             return common::exp10_i64(scale);
         } else if constexpr (std::is_same_v<T, __int128>) {
             return common::exp10_i128(scale);
         } else if constexpr (std::is_same_v<T, wide::Int256>) {
             return common::exp10_i256(scale);
         }
     }

     // This is considerably faster than glibc's implementation (25x).
     // In the case of overflow, the max/min value for the data type will be returned.
     // Assumes s represents a decimal number.
     template <typename T>
     static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
         T ans = string_to_int_internal<T>(s, len, result);
         if (LIKELY(*result == PARSE_SUCCESS)) {
             return ans;
         }

         int i = skip_leading_whitespace(s, len);
         return string_to_int_internal<T>(s + i, len - i, result);
     }

     // This is considerably faster than glibc's implementation.
     // In the case of overflow, the max/min value for the data type will be returned.
     // Assumes s represents a decimal number.
     template <typename T>
     static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
         T ans = string_to_unsigned_int_internal<T>(s, len, result);
         if (LIKELY(*result == PARSE_SUCCESS)) {
             return ans;
         }

         int i = skip_leading_whitespace(s, len);
         return string_to_unsigned_int_internal<T>(s + i, len - i, result);
     }

     // Convert a string s representing a number in given base into a decimal number.
     template <typename T>
     static inline T string_to_int(const char* __restrict s, int64_t len, int base,
                                   ParseResult* result) {
         T ans = string_to_int_internal<T>(s, len, base, result);
         if (LIKELY(*result == PARSE_SUCCESS)) {
             return ans;
         }

         int i = skip_leading_whitespace(s, len);
         return string_to_int_internal<T>(s + i, len - i, base, result);
     }

     template <typename T>
     static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) {
         return string_to_float_internal<T>(s, len, result);
     }

     // Parses a string for 'true' or 'false', case insensitive.
     static inline bool string_to_bool(const char* __restrict s, int len, ParseResult* result) {
         bool ans = string_to_bool_internal(s, len, result);
         if (LIKELY(*result == PARSE_SUCCESS)) {
             return ans;
         }

         int i = skip_leading_whitespace(s, len);
         return string_to_bool_internal(s + i, len - i, result);
     }

     template <PrimitiveType P>
     static inline typename PrimitiveTypeTraits<P>::CppType::NativeType string_to_decimal(
             const char* __restrict s, int len, int type_precision, int type_scale,
             ParseResult* result);

     template <typename T>
     static Status split_string_to_map(const std::string& base, const T element_separator,
                                       const T key_value_separator,
                                       std::map<std::string, std::string>* result) {
         int key_pos = 0;
         int key_end;
         int val_pos;
         int val_end;

         while ((key_end = base.find(key_value_separator, key_pos)) != std::string::npos) {
             if ((val_pos = base.find_first_not_of(key_value_separator, key_end)) ==
                 std::string::npos) {
                 break;
             }
             if ((val_end = base.find(element_separator, val_pos)) == std::string::npos) {
                 val_end = base.size();
             }
             result->insert(std::make_pair(base.substr(key_pos, key_end - key_pos),
                                           base.substr(val_pos, val_end - val_pos)));
             key_pos = val_end;
             if (key_pos != std::string::npos) {
                 ++key_pos;
             }
         }

         return Status::OK();
     }

 private:
     // This is considerably faster than glibc's implementation.
     // In the case of overflow, the max/min value for the data type will be returned.
     // Assumes s represents a decimal number.
     // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
     template <typename T>
     static inline T string_to_int_internal(const char* __restrict s, int len, ParseResult* result);

     // This is considerably faster than glibc's implementation.
     // In the case of overflow, the max/min value for the data type will be returned.
     // Assumes s represents a decimal number.
     // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
     template <typename T>
     static inline T string_to_unsigned_int_internal(const char* __restrict s, int len,
                                                     ParseResult* result);

     // Convert a string s representing a number in given base into a decimal number.
     // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
     template <typename T>
     static inline T string_to_int_internal(const char* __restrict s, int64_t len, int base,
                                            ParseResult* result);

     // Converts an ascii string to an integer of type T assuming it cannot overflow
     // and the number is positive.
     // Leading whitespace is not allowed. Trailing whitespace will be skipped.
     template <typename T>
     static inline T string_to_int_no_overflow(const char* __restrict s, int len,
                                               ParseResult* result);

     // This is considerably faster than glibc's implementation (>100x why???)
     // No special case handling needs to be done for overflows, the floating point spec
     // already does it and will cap the values to -inf/inf
     // To avoid inaccurate conversions this function falls back to strtod for
     // scientific notation.
     // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
     // TODO: Investigate using intrinsics to speed up the slow strtod path.
     template <typename T>
     static inline T string_to_float_internal(const char* __restrict s, int len,
                                              ParseResult* result);

     // parses a string for 'true' or 'false', case insensitive
     // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
     static inline bool string_to_bool_internal(const char* __restrict s, int len,
                                                ParseResult* result);

     // Returns true if s only contains whitespace.
     static inline bool is_all_whitespace(const char* __restrict s, int len) {
         for (int i = 0; i < len; ++i) {
             if (!LIKELY(is_whitespace(s[i]))) {
                 return false;
             }
         }
         return true;
     }

     // For strings like "3.0", "3.123", and "3.", can parse them as 3.
     static inline bool is_float_suffix(const char* __restrict s, int len) {
         return (s[0] == '.' && is_all_digit(s + 1, len - 1));
     }

     static inline bool is_all_digit(const char* __restrict s, int len) {
         for (int i = 0; i < len; ++i) {
             if (!LIKELY(s[i] >= '0' && s[i] <= '9')) {
                 return false;
             }
         }
         return true;
     }

     // Returns the position of the first non-whitespace character in s.
     static inline int skip_leading_whitespace(const char* __restrict s, int len) {
         int i = 0;
         while (i < len && is_whitespace(s[i])) {
             ++i;
         }
         return i;
     }

     // Our own definition of "isspace" that optimize on the ' ' branch.
     static inline bool is_whitespace(const char& c) {
         return LIKELY(c == ' ') ||
                UNLIKELY(c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r');
     }

 }; // end of class StringParser

 template <typename T>
 T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
     if (UNLIKELY(len <= 0)) {
         *result = PARSE_FAILURE;
         return 0;
     }

     typedef typename std::make_unsigned<T>::type UnsignedT;
     UnsignedT val = 0;
     UnsignedT max_val = StringParser::numeric_limits<T>(false);
     bool negative = false;
     int i = 0;
     switch (*s) {
     case '-':
         negative = true;
         max_val += 1;
         [[fallthrough]];
     case '+':
         ++i;
         // only one '+'/'-' char, so could return failure directly
         if (UNLIKELY(len == 1)) {
             *result = PARSE_FAILURE;
             return 0;
         }
     }

     // This is the fast path where the string cannot overflow.
     if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
         val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
         return static_cast<T>(negative ? -val : val);
     }

     const T max_div_10 = max_val / 10;
     const T max_mod_10 = max_val % 10;

     int first = i;
     for (; i < len; ++i) {
         if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
             T digit = s[i] - '0';
             // This is a tricky check to see if adding this digit will cause an overflow.
             if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
                 *result = PARSE_OVERFLOW;
                 return negative ? -max_val : max_val;
             }
             val = val * 10 + digit;
         } else {
             if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
                                          !is_float_suffix(s + i, len - i))))) {
                 // Reject the string because either the first char was not a digit,
                 // or the remaining chars are not all whitespace
                 *result = PARSE_FAILURE;
                 return 0;
             }
             // Returning here is slightly faster than breaking the loop.
             *result = PARSE_SUCCESS;
             return static_cast<T>(negative ? -val : val);
         }
     }
     *result = PARSE_SUCCESS;
     return static_cast<T>(negative ? -val : val);
 }

 template <typename T>
 T StringParser::string_to_unsigned_int_internal(const char* __restrict s, int len,
                                                 ParseResult* result) {
     if (UNLIKELY(len <= 0)) {
         *result = PARSE_FAILURE;
         return 0;
     }

     T val = 0;
     T max_val = std::numeric_limits<T>::max();
     int i = 0;

     typedef typename std::make_signed<T>::type signedT;
     // This is the fast path where the string cannot overflow.
     if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
         val = string_to_int_no_overflow<T>(s + i, len - i, result);
         return val;
     }

     const T max_div_10 = max_val / 10;
     const T max_mod_10 = max_val % 10;

     int first = i;
     for (; i < len; ++i) {
         if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
             T digit = s[i] - '0';
             // This is a tricky check to see if adding this digit will cause an overflow.
             if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
                 *result = PARSE_OVERFLOW;
                 return max_val;
             }
             val = val * 10 + digit;
         } else {
             if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
                 // Reject the string because either the first char was not a digit,
                 // or the remaining chars are not all whitespace
                 *result = PARSE_FAILURE;
                 return 0;
             }
             // Returning here is slightly faster than breaking the loop.
             *result = PARSE_SUCCESS;
             return val;
         }
     }
     *result = PARSE_SUCCESS;
     return val;
 }

 template <typename T>
 T StringParser::string_to_int_internal(const char* __restrict s, int64_t len, int base,
                                        ParseResult* result) {
     typedef typename std::make_unsigned<T>::type UnsignedT;
     UnsignedT val = 0;
     UnsignedT max_val = StringParser::numeric_limits<T>(false);
     bool negative = false;
     if (UNLIKELY(len <= 0)) {
         *result = PARSE_FAILURE;
         return 0;
     }
     int i = 0;
     switch (*s) {
     case '-':
         negative = true;
         max_val = StringParser::numeric_limits<T>(false) + 1;
         [[fallthrough]];
     case '+':
         i = 1;
     }

     const T max_div_base = max_val / base;
     const T max_mod_base = max_val % base;

     int first = i;
     for (; i < len; ++i) {
         T digit;
         if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
             digit = s[i] - '0';
         } else if (s[i] >= 'a' && s[i] <= 'z') {
             digit = (s[i] - 'a' + 10);
         } else if (s[i] >= 'A' && s[i] <= 'Z') {
             digit = (s[i] - 'A' + 10);
         } else {
             if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
                 // Reject the string because either the first char was not an alpha/digit,
                 // or the remaining chars are not all whitespace
                 *result = PARSE_FAILURE;
                 return 0;
             }
             // skip trailing whitespace.
             break;
         }

         // Bail, if we encounter a digit that is not available in base.
         if (digit >= base) {
             break;
         }

         // This is a tricky check to see if adding this digit will cause an overflow.
         if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
             *result = PARSE_OVERFLOW;
             return static_cast<T>(negative ? -max_val : max_val);
         }
         val = val * base + digit;
     }
     *result = PARSE_SUCCESS;
     return static_cast<T>(negative ? -val : val);
 }

 template <typename T>
 T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
     T val = 0;
     if (UNLIKELY(len == 0)) {
         *result = PARSE_SUCCESS;
         return val;
     }
     // Factor out the first char for error handling speeds up the loop.
     if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
         val = s[0] - '0';
     } else {
         *result = PARSE_FAILURE;
         return 0;
     }
     for (int i = 1; i < len; ++i) {
         if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
             T digit = s[i] - '0';
             val = val * 10 + digit;
         } else {
             if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
                           !is_float_suffix(s + i, len - i)))) {
                 *result = PARSE_FAILURE;
                 return 0;
             }
             *result = PARSE_SUCCESS;
             return val;
         }
     }
     *result = PARSE_SUCCESS;
     return val;
 }

 template <typename T>
 T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
     int i = 0;
     // skip leading spaces
     for (; i < len; ++i) {
         if (!is_whitespace(s[i])) {
             break;
         }
     }

     // skip back spaces
     int j = len - 1;
     for (; j >= i; j--) {
         if (!is_whitespace(s[j])) {
             break;
         }
     }

     // skip leading '+', from_chars can handle '-'
     if (i < len && s[i] == '+') {
         i++;
     }
     if (UNLIKELY(i > j)) {
         *result = PARSE_FAILURE;
         return 0;
     }

     // Use double here to not lose precision while accumulating the result
     double val = 0;
     auto res = fast_float::from_chars(s + i, s + j + 1, val);

     if (res.ec == std::errc() && res.ptr == s + j + 1) {
         if (abs(val) == std::numeric_limits<T>::infinity()) {
             auto contain_inf = false;
             for (int k = i; k < j + 1; k++) {
                 if (s[k] == 'i' || s[k] == 'I') {
                     contain_inf = true;
                     break;
                 }
             }

             *result = contain_inf ? PARSE_SUCCESS : PARSE_OVERFLOW;
         } else {
             *result = PARSE_SUCCESS;
         }
         return val;
     } else {
         *result = PARSE_FAILURE;
     }
     return 0;
 }

 inline bool StringParser::string_to_bool_internal(const char* __restrict s, int len,
                                                   ParseResult* result) {
     *result = PARSE_SUCCESS;

     if (len >= 4 && (s[0] == 't' || s[0] == 'T')) {
         bool match = (s[1] == 'r' || s[1] == 'R') && (s[2] == 'u' || s[2] == 'U') &&
                      (s[3] == 'e' || s[3] == 'E');
         if (match && LIKELY(is_all_whitespace(s + 4, len - 4))) {
             return true;
         }
     } else if (len >= 5 && (s[0] == 'f' || s[0] == 'F')) {
         bool match = (s[1] == 'a' || s[1] == 'A') && (s[2] == 'l' || s[2] == 'L') &&
                      (s[3] == 's' || s[3] == 'S') && (s[4] == 'e' || s[4] == 'E');
         if (match && LIKELY(is_all_whitespace(s + 5, len - 5))) {
             return false;
         }
     }

     *result = PARSE_FAILURE;
     return false;
 }

 template <PrimitiveType P>
 typename PrimitiveTypeTraits<P>::CppType::NativeType StringParser::string_to_decimal(
         const char* __restrict s, int len, int type_precision, int type_scale,
         ParseResult* result) {
     using T = typename PrimitiveTypeTraits<P>::CppType::NativeType;
     static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
                           std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
                   "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
                   "wide::Int256.");
     // Special cases:
     //   1) '' == Fail, an empty string fails to parse.
     //   2) '   #   ' == #, leading and trailing white space is ignored.
     //   3) '.' == 0, a single dot parses as zero (for consistency with other types).
     //   4) '#.' == '#', a trailing dot is ignored.

     // Ignore leading and trailing spaces.
     while (len > 0 && is_whitespace(*s)) {
         ++s;
         --len;
     }
     while (len > 0 && is_whitespace(s[len - 1])) {
         --len;
     }

     bool is_negative = false;
     if (len > 0) {
         switch (*s) {
         case '-':
             is_negative = true;
             [[fallthrough]];
         case '+':
             ++s;
             --len;
         }
     }

     // Ignore leading zeros.
     bool found_value = false;
     while (len > 0 && UNLIKELY(*s == '0')) {
         found_value = true;
         ++s;
         --len;
     }

     // Ignore leading zeros even after a dot. This allows for differentiating between
     // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would
     // overflow.
     int scale = 0;
     int found_dot = 0;
     if (len > 0 && *s == '.') {
         found_dot = 1;
         ++s;
         --len;
         while (len > 0 && UNLIKELY(*s == '0')) {
             found_value = true;
             ++scale;
             ++s;
             --len;
         }
     }

     int precision = 0;
     int max_digit = type_precision - type_scale;
     int cur_digit = 0;
     bool found_exponent = false;
     int8_t exponent = 0;
     T value = 0;
     bool has_round = false;
     for (int i = 0; i < len; ++i) {
         const char& c = s[i];
         if (LIKELY('0' <= c && c <= '9')) {
             found_value = true;
             // Ignore digits once the type's precision limit is reached. This avoids
             // overflowing the underlying storage while handling a string like
             // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
             // an exponent will be made later.
             if (LIKELY(type_precision > precision) && !has_round) {
                 value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
                 ++precision;
                 scale += found_dot;
                 cur_digit = precision - scale;
             } else if (!found_dot && max_digit < (precision - scale)) {
                 *result = StringParser::PARSE_OVERFLOW;
                 value = is_negative ? vectorized::min_decimal_value<P>(type_precision)
                                     : vectorized::max_decimal_value<P>(type_precision);
                 return value;
             } else if (found_dot && scale >= type_scale && !has_round) {
                 // make rounding cases
                 if (c > '4') {
                     value += 1;
                 }
                 has_round = true;
                 continue;
             } else if (!found_dot) {
                 ++cur_digit;
             }
             DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128.
         } else if (c == '.' && LIKELY(!found_dot)) {
             found_dot = 1;
         } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
             found_exponent = true;
             exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result);
             if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
                 if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
                     *result = StringParser::PARSE_UNDERFLOW;
                 }
                 return 0;
             }
             break;
         } else {
             if (value == 0) {
                 *result = StringParser::PARSE_FAILURE;
                 return 0;
             }
             // here to handle
             *result = StringParser::PARSE_SUCCESS;
             if (type_scale >= scale) {
                 value *= get_scale_multiplier<T>(type_scale - scale);
                 // here meet non-valid character, should return the value, keep going to meet
                 // the E/e character because we make right user-given type_precision
                 // not max number type_precision
                 if (!is_numeric_ascii(c)) {
                     if (cur_digit > type_precision) {
                         *result = StringParser::PARSE_OVERFLOW;
                         value = is_negative ? vectorized::min_decimal_value<P>(type_precision)
                                             : vectorized::max_decimal_value<P>(type_precision);
                         return value;
                     }
                     return is_negative ? T(-value) : T(value);
                 }
             }

             return is_negative ? T(-value) : T(value);
         }
     }

     // Find the number of truncated digits before adjusting the precision for an exponent.
     if (exponent > scale) {
         // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the
         //     scale must be set to 0 and the value set to 100 which means a precision of 3.
         precision += exponent - scale;

         value *= get_scale_multiplier<T>(exponent - scale);
         scale = 0;
     } else {
         // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed,
         //     the precision must also be set to 4 but that will be done below for the
         //     non-exponent case anyways.
         scale -= exponent;
     }
     // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros
     //     were ignored during previous parsing.
     if (scale > precision) {
         precision = scale;
     }

     // Microbenchmarks show that beyond this point, returning on parse failure is slower
     // than just letting the function run out.
     *result = StringParser::PARSE_SUCCESS;
     if (UNLIKELY(precision - scale > type_precision - type_scale)) {
         *result = StringParser::PARSE_OVERFLOW;
         if constexpr (TYPE_DECIMALV2 != P) {
             // decimalv3 overflow will return max min value for type precision
             value = is_negative ? vectorized::min_decimal_value<P>(type_precision)
                                 : vectorized::max_decimal_value<P>(type_precision);
             return value;
         }
     } else if (UNLIKELY(scale > type_scale)) {
         *result = StringParser::PARSE_UNDERFLOW;
         int shift = scale - type_scale;
         T divisor = get_scale_multiplier<T>(shift);
         if (UNLIKELY(divisor == std::numeric_limits<T>::max())) {
             value = 0;
         } else {
             T remainder = value % divisor;
             value /= divisor;
             if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) {
                 value += 1;
             }
         }
         DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128.
     } else if (UNLIKELY(!found_value && !found_dot)) {
         *result = StringParser::PARSE_FAILURE;
     }

     if (type_scale > scale) {
         value *= get_scale_multiplier<T>(type_scale - scale);
     }

     return is_negative ? T(-value) : T(value);
 }

 } // end namespace doris