be/src/util/string_parser.hpp - doris - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
 // This file is copied from
 // https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/string-parser.hpp
 // and modified by Doris

 #pragma once

 #include <fast_float/fast_float.h>
 #include <fast_float/parse_number.h>
 #include <glog/logging.h>
 #include <sys/types.h>

 #include <algorithm>
 #include <cstdlib>
 // IWYU pragma: no_include <bits/std_abs.h>
 #include <cmath> // IWYU pragma: keep
 #include <cstdint>
 #include <limits>
 #include <map>
 #include <string>
 #include <type_traits>
 #include <utility>

 #include "common/compiler_util.h" // IWYU pragma: keep
 #include "common/status.h"
 #include "runtime/large_int_value.h"
 #include "runtime/primitive_type.h"
 #include "vec/common/int_exp.h"
 #include "vec/common/string_utils/string_utils.h"
 #include "vec/core/extended_types.h"
 #include "vec/data_types/number_traits.h"

 namespace doris {
 #include "common/compile_check_avoid_begin.h"
 namespace vectorized {
 template <DecimalNativeTypeConcept T>
 struct Decimal;
 } // namespace vectorized

 // they rely on the template parameter `IS_STRICT`. in strict mode, it will set error code and otherwise it will not.
 #ifndef SET_PARAMS_RET_FALSE_IFN
 #define SET_PARAMS_RET_FALSE_IFN(stmt, ...)                           \
     do {                                                              \
         if (!(stmt)) [[unlikely]] {                                   \
             if constexpr (IsStrict) {                                 \
                 params.status = Status::InvalidArgument(__VA_ARGS__); \
             }                                                         \
             return false;                                             \
         }                                                             \
     } while (false)
 #endif

 #ifndef SET_PARAMS_RET_FALSE_FROM_EXCEPTION
 #define SET_PARAMS_RET_FALSE_FROM_EXCEPTION(stmt) \
     do {                                          \
         try {                                     \
             { stmt; }                             \
         } catch (const doris::Exception& e) {     \
             if constexpr (IsStrict) {             \
                 params.status = e.to_status();    \
             }                                     \
             return false;                         \
         }                                         \
     } while (false)
 #endif

 // skip leading and trailing ascii whitespaces,
 // return the pointer to the first non-whitespace char,
 // and update the len to the new length, which does not include
 // leading and trailing whitespaces
 template <typename T>
 inline const char* skip_ascii_whitespaces(const char* s, T& len) {
     while (len > 0 && is_whitespace_ascii(*s)) {
         ++s;
         --len;
     }

     while (len > 0 && is_whitespace_ascii(s[len - 1])) {
         --len;
     }

     return s;
 }

 template <bool (*Pred)(char)>
 bool range_suite(const char* s, const char* end) {
     return std::ranges::all_of(s, end, Pred);
 }

 inline auto is_digit_range = range_suite<is_numeric_ascii>;
 inline auto is_space_range = range_suite<is_whitespace_ascii>;

 // combine in_bound and range_suite is ok. won't lead to duplicated calculation.
 inline bool in_bound(const char* s, const char* end, size_t offset) {
     if (s + offset >= end) [[unlikely]] {
         return false;
     }
     return true;
 }

 // LEN = 0 means any length(include zero). LEN = 1 means only one character. so on. LEN = -x means x or more.
 // if need result, use StringRef{origin_s, s} outside
 template <int LEN, bool (*Pred)(char)>
 bool skip_qualified_char(const char*& s, const char* end) {
     if constexpr (LEN == 0) {
         // Consume any length of characters that match the predicate.
         while (s != end && Pred(*s)) {
             ++s;
         }
     } else if constexpr (LEN > 0) {
         // Consume exactly LEN characters that match the predicate.
         for (int i = 0; i < LEN; ++i, ++s) {
             if (s == end || !Pred(*s)) [[unlikely]] {
                 return false;
             }
         }
     } else { // LEN < 0
         // Consume at least -LEN characters that match the predicate.
         int count = 0;
         while (s != end && Pred(*s)) {
             ++s;
             ++count;
         }
         if (count < -LEN) [[unlikely]] {
             return false;
         }
     }
     return true;
 }

 inline auto skip_any_whitespace = skip_qualified_char<0, is_whitespace_ascii>;
 inline auto skip_any_digit = skip_qualified_char<0, is_numeric_ascii>;
 inline auto skip_tz_name_part = skip_qualified_char<-1, is_not_whitespace_ascii>;
 inline auto skip_one_slash = skip_qualified_char<1, is_slash_ascii>;
 inline auto skip_one_non_alnum = skip_qualified_char<1, is_non_alnum>;

 inline bool is_delimiter(char c) {
     return c == ' ' || c == 'T' || c == ':';
 }
 inline auto consume_one_delimiter = skip_qualified_char<1, is_delimiter>;

 inline bool is_date_sep(char c) {
     return c == '-' || c == '/';
 }
 inline auto consume_one_date_sep = skip_qualified_char<1, is_date_sep>;

 inline bool is_colon(char c) {
     return c == ':';
 }
 inline auto consume_one_colon = skip_qualified_char<1, is_colon>;

 // only consume a string of digit, not include sign.
 // when has MAX_LEN > 0, do greedy match but at most MAX_LEN.
 // LEN = 0 means any length, otherwise(must > 0) it means exactly LEN digits.
 template <typename T, int LEN = 0, int MAX_LEN = -1>
 bool consume_digit(const char*& s, const char* end, T& out) {
     static_assert(LEN >= 0);
     if constexpr (MAX_LEN > 0) {
         out = 0;
         for (int i = 0; i < MAX_LEN; ++i, ++s) {
             if (s == end || !is_numeric_ascii(*s)) {
                 if (i < LEN) [[unlikely]] {
                     return false;
                 }
                 break; // stop consuming if we have consumed enough digits.
             }
             out = out * 10 + (*s - '0');
         }
     } else if constexpr (LEN == 0) {
         // Consume any length of digits.
         out = 0;
         while (s != end && is_numeric_ascii(*s)) {
             out = out * 10 + (*s - '0');
             ++s;
         }
     } else if constexpr (LEN > 0) {
         // Consume exactly LEN digits.
         out = 0;
         for (int i = 0; i < LEN; ++i, ++s) {
             if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
                 return false;
             }
             out = out * 10 + (*s - '0');
         }
     }
     return true;
 }

 // specialized version for 2 digits, which is used very often in date/time parsing.
 template <>
 inline bool consume_digit<uint32_t, 2, -1>(const char*& s, const char* end, uint32_t& out) {
     out = 0;
     if (s == end || s + 1 == end || !is_numeric_ascii(*s) || !is_numeric_ascii(*(s + 1)))
             [[unlikely]] {
         return false;
     }
     out = (s[0] - '0') * 10 + (s[1] - '0');
     s += 2; // consume 2 digits
     return true;
 }

 // specialized version for 1 or 2 digits, which is used very often in date/time parsing.
 template <>
 inline bool consume_digit<uint32_t, 1, 2>(const char*& s, const char* end, uint32_t& out) {
     out = 0;
     if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
         return false;
     } else if (s + 1 != end && is_numeric_ascii(*(s + 1))) {
         // consume 2 digits
         out = (*s - '0') * 10 + (*(s + 1) - '0');
         s += 2;
     } else {
         // consume 1 digit
         out = *s - '0';
         ++s;
     }
     return true;
 }

 template <bool (*Pred)(char)>
 uint32_t count_valid_length(const char* s, const char* end) {
     DCHECK(s <= end) << "s: " << s << ", end: " << end;
     uint32_t count = 0;
     while (s != end && Pred(*s)) {
         ++count;
         ++s;
     }
     return count;
 }

 inline auto count_digits = count_valid_length<is_numeric_ascii>;

 inline PURE std::string combine_tz_offset(char sign, uint32_t hour_offset, uint32_t minute_offset) {
     std::string result(6, '0');
     result[0] = sign;
     result[1] = '0' + (hour_offset / 10);
     result[2] = '0' + (hour_offset % 10);
     result[3] = ':';
     result[4] = '0' + (minute_offset / 10);
     result[5] = '0' + (minute_offset % 10);
     DCHECK_EQ(result.size(), 6);
     return result;
 }

 // Utility functions for doing atoi/atof on non-null terminated strings.  On micro benchmarks,
 // this is significantly faster than libc (atoi/strtol and atof/strtod).
 //
 // Strings with leading and trailing whitespaces are accepted.
 // Branching is heavily optimized for the non-whitespace successful case.
 // All the StringTo* functions first parse the input string assuming it has no leading whitespace.
 // If that first attempt was unsuccessful, these functions retry the parsing after removing
 // whitespace. Therefore, strings with whitespace take a perf hit on branch mis-prediction.
 //
 // For overflows, we are following the mysql behavior, to cap values at the max/min value for that
 // data type.  This is different from hive, which returns NULL for overflow slots for int types
 // and inf/-inf for float types.
 //
 // Things we tried that did not work:
 //  - lookup table for converting character to digit
 // Improvements (TODO):
 //  - Validate input using _simd_compare_ranges
 //  - Since we know the length, we can parallelize this: i.e. result = 100*s[0] + 10*s[1] + s[2]
 class StringParser {
 public:
     enum ParseResult { PARSE_SUCCESS = 0, PARSE_FAILURE, PARSE_OVERFLOW, PARSE_UNDERFLOW };

     template <typename T>
     static T numeric_limits(bool negative) {
         if constexpr (std::is_same_v<T, __int128>) {
             return negative ? MIN_INT128 : MAX_INT128;
         } else {
             return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
         }
     }

     template <typename T>
     static T get_scale_multiplier(int scale) {
         static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
                               std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
                       "You can only instantiate as int32_t, int64_t, __int128.");
         if constexpr (std::is_same_v<T, int32_t>) {
             return common::exp10_i32(scale);
         } else if constexpr (std::is_same_v<T, int64_t>) {
             return common::exp10_i64(scale);
         } else if constexpr (std::is_same_v<T, __int128>) {
             return common::exp10_i128(scale);
         } else if constexpr (std::is_same_v<T, wide::Int256>) {
             return common::exp10_i256(scale);
         }
     }

     // This is considerably faster than glibc's implementation (25x).
     // Assumes s represents a decimal number.
     template <typename T, bool enable_strict_mode = false>
     static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
         s = skip_ascii_whitespaces(s, len);
         return string_to_int_internal<T, enable_strict_mode>(s, len, result);
     }

     // This is considerably faster than glibc's implementation.
     // In the case of overflow, the max/min value for the data type will be returned.
     // Assumes s represents a decimal number.
     template <typename T>
     static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
         s = skip_ascii_whitespaces(s, len);
         return string_to_unsigned_int_internal<T>(s, len, result);
     }

     // Convert a string s representing a number in given base into a decimal number.
     template <typename T>
     static inline T string_to_int(const char* __restrict s, int64_t len, int base,
                                   ParseResult* result) {
         s = skip_ascii_whitespaces(s, len);
         return string_to_int_internal<T>(s, len, base, result);
     }

     template <typename T>
     static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) {
         s = skip_ascii_whitespaces(s, len);
         return string_to_float_internal<T>(s, len, result);
     }

     // Parses a string for 'true' or 'false', case insensitive.
     static inline bool string_to_bool(const char* __restrict s, size_t len, ParseResult* result) {
         s = skip_ascii_whitespaces(s, len);
         return string_to_bool_internal(s, len, result);
     }

     template <PrimitiveType P>
     static typename PrimitiveTypeTraits<P>::CppType::NativeType string_to_decimal(
             const char* __restrict s, size_t len, int type_precision, int type_scale,
             ParseResult* result);

     template <typename T>
     static Status split_string_to_map(const std::string& base, const T element_separator,
                                       const T key_value_separator,
                                       std::map<std::string, std::string>* result) {
         int key_pos = 0;
         int key_end;
         int val_pos;
         int val_end;

         while ((key_end = base.find(key_value_separator, key_pos)) != std::string::npos) {
             if ((val_pos = base.find_first_not_of(key_value_separator, key_end)) ==
                 std::string::npos) {
                 break;
             }
             if ((val_end = base.find(element_separator, val_pos)) == std::string::npos) {
                 val_end = base.size();
             }
             result->insert(std::make_pair(base.substr(key_pos, key_end - key_pos),
                                           base.substr(val_pos, val_end - val_pos)));
             key_pos = val_end;
             if (key_pos != std::string::npos) {
                 ++key_pos;
             }
         }

         return Status::OK();
     }

     // This is considerably faster than glibc's implementation.
     // In the case of overflow, the max/min value for the data type will be returned.
     // Assumes s represents a decimal number.
     // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
     template <typename T, bool enable_strict_mode = false>
     static inline T string_to_int_internal(const char* __restrict s, int len, ParseResult* result);

     // This is considerably faster than glibc's implementation.
     // In the case of overflow, the max/min value for the data type will be returned.
     // Assumes s represents a decimal number.
     // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
     template <typename T>
     static inline T string_to_unsigned_int_internal(const char* __restrict s, int len,
                                                     ParseResult* result);

     // Convert a string s representing a number in given base into a decimal number.
     // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
     template <typename T>
     static inline T string_to_int_internal(const char* __restrict s, int64_t len, int base,
                                            ParseResult* result);

     // Converts an ascii string to an integer of type T assuming it cannot overflow
     // and the number is positive.
     // Leading whitespace is not allowed. Trailing whitespace will be skipped.
     template <typename T, bool enable_strict_mode = false>
     static inline T string_to_int_no_overflow(const char* __restrict s, int len,
                                               ParseResult* result);

     // zero length, or at least one legal digit. at most consume MAX_LEN digits and stop. or stop when next
     // char is not a digit.
     template <typename T>
     static inline T string_to_uint_greedy_no_overflow(const char* __restrict s, int max_len,
                                                       ParseResult* result);

     // This is considerably faster than glibc's implementation (>100x why???)
     // No special case handling needs to be done for overflows, the floating point spec
     // already does it and will cap the values to -inf/inf
     // To avoid inaccurate conversions this function falls back to strtod for
     // scientific notation.
     // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
     // TODO: Investigate using intrinsics to speed up the slow strtod path.
     template <typename T>
     static inline T string_to_float_internal(const char* __restrict s, int len,
                                              ParseResult* result);

     // parses a string for 'true' or 'false', case insensitive
     // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
     static inline bool string_to_bool_internal(const char* __restrict s, int len,
                                                ParseResult* result);

     // Returns true if s only contains whitespace.
     static inline bool is_all_whitespace(const char* __restrict s, int len) {
         for (int i = 0; i < len; ++i) {
             if (!LIKELY(is_whitespace_ascii(s[i]))) {
                 return false;
             }
         }
         return true;
     }

     // For strings like "3.0", "3.123", and "3.", can parse them as 3.
     static inline bool is_float_suffix(const char* __restrict s, int len) {
         return (s[0] == '.' && is_all_digit(s + 1, len - 1));
     }

     static inline bool is_all_digit(const char* __restrict s, int len) {
         for (int i = 0; i < len; ++i) {
             if (!LIKELY(s[i] >= '0' && s[i] <= '9')) {
                 return false;
             }
         }
         return true;
     }
 }; // end of class StringParser

 template <typename T, bool enable_strict_mode>
 T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
     if (UNLIKELY(len <= 0)) {
         *result = PARSE_FAILURE;
         return 0;
     }

     using UnsignedT = MakeUnsignedT<T>;
     UnsignedT val = 0;
     UnsignedT max_val = StringParser::numeric_limits<T>(false);
     bool negative = false;
     int i = 0;
     switch (*s) {
     case '-':
         negative = true;
         max_val += 1;
         [[fallthrough]];
     case '+':
         ++i;
         // only one '+'/'-' char, so could return failure directly
         if (UNLIKELY(len == 1)) {
             *result = PARSE_FAILURE;
             return 0;
         }
     }

     // This is the fast path where the string cannot overflow.
     if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
         val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
         return static_cast<T>(negative ? -val : val);
     }

     const T max_div_10 = max_val / 10;
     const T max_mod_10 = max_val % 10;

     int first = i;
     for (; i < len; ++i) {
         if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
             T digit = s[i] - '0';
             // This is a tricky check to see if adding this digit will cause an overflow.
             if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
                 *result = PARSE_OVERFLOW;
                 return negative ? -max_val : max_val;
             }
             val = val * 10 + digit;
         } else {
             if constexpr (enable_strict_mode) {
                 if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
                     // Reject the string because the remaining chars are not all whitespace
                     *result = PARSE_FAILURE;
                     return 0;
                 }
             } else {
                 if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
                                              !is_float_suffix(s + i, len - i))))) {
                     // Reject the string because either the first char was not a digit,
                     // or the remaining chars are not all whitespace
                     *result = PARSE_FAILURE;
                     return 0;
                 }
             }
             // Returning here is slightly faster than breaking the loop.
             *result = PARSE_SUCCESS;
             return static_cast<T>(negative ? -val : val);
         }
     }
     *result = PARSE_SUCCESS;
     return static_cast<T>(negative ? -val : val);
 }

 template <typename T>
 T StringParser::string_to_unsigned_int_internal(const char* __restrict s, int len,
                                                 ParseResult* result) {
     if (UNLIKELY(len <= 0)) {
         *result = PARSE_FAILURE;
         return 0;
     }

     T val = 0;
     T max_val = std::numeric_limits<T>::max();
     int i = 0;

     using signedT = MakeSignedT<T>;
     // This is the fast path where the string cannot overflow.
     if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
         val = string_to_int_no_overflow<T>(s + i, len - i, result);
         return val;
     }

     const T max_div_10 = max_val / 10;
     const T max_mod_10 = max_val % 10;

     int first = i;
     for (; i < len; ++i) {
         if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
             T digit = s[i] - '0';
             // This is a tricky check to see if adding this digit will cause an overflow.
             if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
                 *result = PARSE_OVERFLOW;
                 return max_val;
             }
             val = val * 10 + digit;
         } else {
             if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
                 // Reject the string because either the first char was not a digit,
                 // or the remaining chars are not all whitespace
                 *result = PARSE_FAILURE;
                 return 0;
             }
             // Returning here is slightly faster than breaking the loop.
             *result = PARSE_SUCCESS;
             return val;
         }
     }
     *result = PARSE_SUCCESS;
     return val;
 }

 template <typename T>
 T StringParser::string_to_int_internal(const char* __restrict s, int64_t len, int base,
                                        ParseResult* result) {
     using UnsignedT = MakeUnsignedT<T>;
     UnsignedT val = 0;
     UnsignedT max_val = StringParser::numeric_limits<T>(false);
     bool negative = false;
     if (UNLIKELY(len <= 0)) {
         *result = PARSE_FAILURE;
         return 0;
     }
     int i = 0;
     switch (*s) {
     case '-':
         negative = true;
         max_val = StringParser::numeric_limits<T>(false) + 1;
         [[fallthrough]];
     case '+':
         i = 1;
     }

     const T max_div_base = max_val / base;
     const T max_mod_base = max_val % base;

     int first = i;
     for (; i < len; ++i) {
         T digit;
         if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
             digit = s[i] - '0';
         } else if (s[i] >= 'a' && s[i] <= 'z') {
             digit = (s[i] - 'a' + 10);
         } else if (s[i] >= 'A' && s[i] <= 'Z') {
             digit = (s[i] - 'A' + 10);
         } else {
             if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
                 // Reject the string because either the first char was not an alpha/digit,
                 // or the remaining chars are not all whitespace
                 *result = PARSE_FAILURE;
                 return 0;
             }
             // skip trailing whitespace.
             break;
         }

         // Bail, if we encounter a digit that is not available in base.
         if (digit >= base) {
             break;
         }

         // This is a tricky check to see if adding this digit will cause an overflow.
         if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
             *result = PARSE_OVERFLOW;
             return static_cast<T>(negative ? -max_val : max_val);
         }
         val = val * base + digit;
     }
     *result = PARSE_SUCCESS;
     return static_cast<T>(negative ? -val : val);
 }

 template <typename T, bool enable_strict_mode>
 T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
     T val = 0;
     if (UNLIKELY(len == 0)) {
         *result = PARSE_SUCCESS;
         return val;
     }
     // Factor out the first char for error handling speeds up the loop.
     if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
         val = s[0] - '0';
     } else {
         *result = PARSE_FAILURE;
         return 0;
     }
     for (int i = 1; i < len; ++i) {
         if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
             T digit = s[i] - '0';
             val = val * 10 + digit;
         } else {
             if constexpr (enable_strict_mode) {
                 if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
                     *result = PARSE_FAILURE;
                     return 0;
                 }
             } else {
                 if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
                               !is_float_suffix(s + i, len - i)))) {
                     *result = PARSE_FAILURE;
                     return 0;
                 }
             }
             *result = PARSE_SUCCESS;
             return val;
         }
     }
     *result = PARSE_SUCCESS;
     return val;
 }

 // at least the first char(if any) must be a digit.
 template <typename T>
 T StringParser::string_to_uint_greedy_no_overflow(const char* __restrict s, int max_len,
                                                   ParseResult* result) {
     T val = 0;
     if (max_len == 0) [[unlikely]] {
         *result = PARSE_SUCCESS;
         return val;
     }
     // Factor out the first char for error handling speeds up the loop.
     if (is_numeric_ascii(s[0])) [[likely]] {
         val = s[0] - '0';
     } else {
         *result = PARSE_FAILURE;
         return 0;
     }
     for (int i = 1; i < max_len; ++i) {
         if (is_numeric_ascii(s[i])) [[likely]] {
             T digit = s[i] - '0';
             val = val * 10 + digit;
         } else {
             // 123abc, return 123
             *result = PARSE_SUCCESS;
             return val;
         }
     }
     *result = PARSE_SUCCESS;
     return val;
 }

 template <typename T>
 T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
     int i = 0;
     // skip leading spaces
     for (; i < len; ++i) {
         if (!is_whitespace_ascii(s[i])) {
             break;
         }
     }

     // skip back spaces
     int j = len - 1;
     for (; j >= i; j--) {
         if (!is_whitespace_ascii(s[j])) {
             break;
         }
     }

     // skip leading '+', from_chars can handle '-'
     if (i < len && s[i] == '+') {
         i++;
         // ++ or +- are not valid, but the first + is already skipped,
         // if don't check here, from_chars will succeed.
         //
         // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus'
         // which may avoid this extra check here.
         // e.g.:
         // fast_float::chars_format format =
         //         fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus;
         // auto res = fast_float::from_chars(s + i, s + j + 1, val, format);
         if (i < len && (s[i] == '+' || s[i] == '-')) {
             *result = PARSE_FAILURE;
             return 0;
         }
     }
     if (UNLIKELY(i > j)) {
         *result = PARSE_FAILURE;
         return 0;
     }

     // Use double here to not lose precision while accumulating the result
     double val = 0;
     auto res = fast_float::from_chars(s + i, s + j + 1, val);

     if (res.ptr == s + j + 1) {
         *result = PARSE_SUCCESS;
         return val;
     } else {
         *result = PARSE_FAILURE;
     }
     return 0;
 }

 inline bool StringParser::string_to_bool_internal(const char* __restrict s, int len,
                                                   ParseResult* result) {
     *result = PARSE_SUCCESS;

     if (len == 1) {
         if (s[0] == '1' || s[0] == 't' || s[0] == 'T') {
             return true;
         }
         if (s[0] == '0' || s[0] == 'f' || s[0] == 'F') {
             return false;
         }
         *result = PARSE_FAILURE;
         return false;
     }

     if (len == 2) {
         if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'n' || s[1] == 'N')) {
             return true;
         }
         if ((s[0] == 'n' || s[0] == 'N') && (s[1] == 'o' || s[1] == 'O')) {
             return false;
         }
     }

     if (len == 3) {
         if ((s[0] == 'y' || s[0] == 'Y') && (s[1] == 'e' || s[1] == 'E') &&
             (s[2] == 's' || s[2] == 'S')) {
             return true;
         }
         if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'f' || s[1] == 'F') &&
             (s[2] == 'f' || s[2] == 'F')) {
             return false;
         }
     }

     if (len == 4 && (s[0] == 't' || s[0] == 'T') && (s[1] == 'r' || s[1] == 'R') &&
         (s[2] == 'u' || s[2] == 'U') && (s[3] == 'e' || s[3] == 'E')) {
         return true;
     }

     if (len == 5 && (s[0] == 'f' || s[0] == 'F') && (s[1] == 'a' || s[1] == 'A') &&
         (s[2] == 'l' || s[2] == 'L') && (s[3] == 's' || s[3] == 'S') &&
         (s[4] == 'e' || s[4] == 'E')) {
         return false;
     }

     // No valid boolean value found
     *result = PARSE_FAILURE;
     return false;
 }
 #include "common/compile_check_avoid_end.h"
 } // end namespace doris