| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include "util/string_parser.hpp" |
| |
| #include <limits> |
| |
| #include "vec/core/extended_types.h" |
| namespace doris { |
| // Supported decimal number format: |
| // <decimal> ::= <whitespace>* <value> <whitespace>* |
| // |
| // <whitespace> ::= " " | "\t" | "\n" | "\r" | "\f" | "\v" |
| // |
| // <value> ::= <sign>? <significand> <exponent>? |
| // |
| // <sign> ::= "+" | "-" |
| // |
| // <significand> ::= <digits> "." <digits> | <digits> | <digits> "." | "." <digits> |
| // |
| // <digits> ::= <digit>+ |
| // |
| // <digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
| // |
| // <exponent> ::= <e_marker> <sign>? <digits> |
| // |
| // <e_marker> ::= "e" | "E" |
| template <PrimitiveType P> |
| typename PrimitiveTypeTraits<P>::CppType::NativeType StringParser::string_to_decimal( |
| const char* __restrict s, int len, int type_precision, int type_scale, |
| ParseResult* result) { |
| using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; |
| static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |
| std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |
| "Cast string to decimal only support target type int32_t, int64_t, __int128 or " |
| "wide::Int256."); |
| // Ignore leading and trailing spaces. |
| s = skip_ascii_whitespaces(s, len); |
| |
| bool is_negative = false; |
| if (len > 0) { |
| switch (*s) { |
| case '-': |
| is_negative = true; |
| [[fallthrough]]; |
| case '+': |
| ++s; |
| --len; |
| } |
| } |
| // Ignore leading zeros. |
| bool found_value = false; |
| while (len > 0 && UNLIKELY(*s == '0')) { |
| found_value = true; |
| ++s; |
| --len; |
| } |
| |
| int found_dot = 0; |
| if (len > 0 && *s == '.') { |
| found_dot = 1; |
| ++s; |
| --len; |
| } |
| int int_part_count = 0; |
| std::vector<unsigned char> digits; |
| if (len > 0) { |
| digits.resize(len); |
| } |
| int total_digit_count = 0; |
| int i = 0; |
| for (; i != len; ++i) { |
| const char& c = s[i]; |
| if (LIKELY('0' <= c && c <= '9')) { |
| found_value = true; |
| digits[total_digit_count++] = c - '0'; |
| if (!found_dot) { |
| ++int_part_count; |
| } |
| } else if (c == '.') { |
| if (found_dot) { |
| *result = StringParser::PARSE_FAILURE; |
| return 0; |
| } |
| found_dot = 1; |
| } else { |
| break; |
| } |
| } |
| if (!found_value) { |
| // '', '.' |
| *result = StringParser::PARSE_FAILURE; |
| return 0; |
| } |
| // parse exponent if any |
| int64_t exponent = 0; |
| if (i != len) { |
| bool negative_exponent = false; |
| if (s[i] == 'e' || s[i] == 'E') { |
| ++i; |
| if (i != len) { |
| switch (s[i]) { |
| case '-': |
| negative_exponent = true; |
| [[fallthrough]]; |
| case '+': |
| ++i; |
| } |
| } |
| if (i == len) { |
| // '123e', '123e+', '123e-' |
| *result = StringParser::PARSE_FAILURE; |
| return 0; |
| } |
| for (; i != len; ++i) { |
| const char& c = s[i]; |
| if (LIKELY('0' <= c && c <= '9')) { |
| exponent = exponent * 10 + (c - '0'); |
| // max string len is config::string_type_length_soft_limit_bytes, |
| // whose max value is std::numeric_limits<int32_t>::max() - 4, |
| // just check overflow of int32_t to simplify the logic |
| // For edge cases like 0.{2147483647 zeros}e+2147483647 |
| if (exponent > std::numeric_limits<int32_t>::max()) { |
| *result = StringParser::PARSE_OVERFLOW; |
| return 0; |
| } |
| } else { |
| // '123e12abc', '123e1.2' |
| *result = StringParser::PARSE_FAILURE; |
| return 0; |
| } |
| } |
| if (negative_exponent) { |
| exponent = -exponent; |
| } |
| } else { |
| *result = StringParser::PARSE_FAILURE; |
| return 0; |
| } |
| } |
| T int_part_number = 0; |
| T frac_part_number = 0; |
| // TODO: check limit values of exponent and add UT |
| // max string len is config::string_type_length_soft_limit_bytes, |
| // whose max value is std::numeric_limits<int32_t>::max() - 4, |
| // so int_part_count will be in range of int32_t, |
| // and int_part_count + exponent will be in range of int64_t |
| int64_t tmp_actual_int_part_count = int_part_count + exponent; |
| if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || |
| tmp_actual_int_part_count < std::numeric_limits<int>::min()) { |
| *result = StringParser::PARSE_OVERFLOW; |
| return 0; |
| } |
| int actual_int_part_count = tmp_actual_int_part_count; |
| int actual_frac_part_count = 0; |
| int digit_index = 0; |
| if (actual_int_part_count >= 0) { |
| int max_index = std::min(actual_int_part_count, total_digit_count); |
| // skip zero number |
| for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { |
| } |
| // test 0.00, .00, 0.{00...}e2147483647 |
| // 0.00000e2147483647 |
| if (max_index - digit_index > type_precision - type_scale) { |
| *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |
| return 0; |
| } |
| // get int part number |
| for (; digit_index != max_index; ++digit_index) { |
| int_part_number = int_part_number * 10 + digits[digit_index]; |
| } |
| if (digit_index != actual_int_part_count) { |
| int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); |
| } |
| } else { |
| // leading zeros of fraction part |
| actual_frac_part_count = -actual_int_part_count; |
| } |
| // get fraction part number |
| for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; |
| ++digit_index, ++actual_frac_part_count) { |
| frac_part_number = frac_part_number * 10 + digits[digit_index]; |
| } |
| auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); |
| // there are still extra fraction digits left, check rounding |
| if (digit_index != total_digit_count) { |
| // example: test 1.5 -> decimal(1, 0) |
| if (digits[digit_index] >= 5) { |
| ++frac_part_number; |
| if (frac_part_number == type_scale_multiplier) { |
| frac_part_number = 0; |
| ++int_part_number; |
| } |
| } |
| } else { |
| if (actual_frac_part_count < type_scale) { |
| frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); |
| } |
| } |
| if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { |
| *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |
| return 0; |
| } |
| |
| T value = int_part_number * type_scale_multiplier + frac_part_number; |
| *result = StringParser::PARSE_SUCCESS; |
| return is_negative ? T(-value) : T(value); |
| } |
| template vectorized::Int32 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL32>( |
| const char* __restrict s, int len, int type_precision, int type_scale, ParseResult* result); |
| template vectorized::Int64 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL64>( |
| const char* __restrict s, int len, int type_precision, int type_scale, ParseResult* result); |
| template vectorized::Int128 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL128I>( |
| const char* __restrict s, int len, int type_precision, int type_scale, ParseResult* result); |
| template vectorized::Int128 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMALV2>( |
| const char* __restrict s, int len, int type_precision, int type_scale, ParseResult* result); |
| template wide::Int256 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL256>( |
| const char* __restrict s, int len, int type_precision, int type_scale, ParseResult* result); |
| } // end namespace doris |