be/src/vec/common/format_ip.h - doris - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
 // This file is copied from
 // https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/formatIPv6.h
 // and modified by Doris

 #pragma once

 #include <vec/common/hex.h>
 #include <vec/common/string_utils/string_utils.h>
 #include <vec/core/types.h>

 #include <algorithm>
 #include <array>
 #include <bit>
 #include <cstdint>
 #include <cstring>
 #include <utility>

 constexpr size_t IPV4_BINARY_LENGTH = 4;
 constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte.
 constexpr size_t IPV6_MAX_TEXT_LENGTH = 39;
 constexpr size_t IPV4_MIN_NUM_VALUE = 0;          //num value of '0.0.0.0'
 constexpr size_t IPV4_MAX_NUM_VALUE = 4294967295; //num value of '255.255.255.255'
 constexpr int IPV4_MAX_OCTET_VALUE = 255;         //max value of octet
 constexpr size_t IPV4_OCTET_BITS = 8;
 constexpr size_t DECIMAL_BASE = 10;
 constexpr size_t IPV6_BINARY_LENGTH = 16;

 namespace doris::vectorized {
 #include "common/compile_check_begin.h"

 extern const std::array<std::pair<const char*, size_t>, 256> one_byte_to_string_lookup_table;

 /** Format 4-byte binary sequesnce as IPv4 text: 'aaa.bbb.ccc.ddd',
   * expects in out to be in BE-format, that is 0x7f000001 => "127.0.0.1".
   *
   * Any number of the tail bytes can be masked with given mask string.
   *
   * Assumptions:
   *     src is IPV4_BINARY_LENGTH long,
   *     dst is IPV4_MAX_TEXT_LENGTH long,
   *     mask_tail_octets <= IPV4_BINARY_LENGTH
   *     mask_string is NON-NULL, if mask_tail_octets > 0.
   *
   * Examples:
   *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 0, nullptr);
   *         > dst == "127.0.0.1"
   *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "xxx");
   *         > dst == "127.0.0.xxx"
   *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "0");
   *         > dst == "127.0.0.0"
   */
 inline void format_ipv4(const unsigned char* src, size_t src_size, char*& dst,
                         uint8_t mask_tail_octets = 0, const char* mask_string = "xxx") {
     const size_t mask_length = mask_string ? strlen(mask_string) : 0;
     const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets);
     const size_t padding = std::min(4 - src_size, limit);
     for (size_t octet = 0; octet < padding; ++octet) {
         *dst++ = '0';
         *dst++ = '.';
     }

     for (size_t octet = 4 - src_size; octet < limit; ++octet) {
         uint8_t value = 0;
         if constexpr (std::endian::native == std::endian::little)
             value = static_cast<uint8_t>(src[IPV4_BINARY_LENGTH - octet - 1]);
         else
             value = static_cast<uint8_t>(src[octet]);
         const uint8_t len = static_cast<uint8_t>(one_byte_to_string_lookup_table[value].second);
         const char* str = one_byte_to_string_lookup_table[value].first;

         memcpy(dst, str, len);
         dst += len;

         *dst++ = '.';
     }

     for (size_t mask = 0; mask < mask_tail_octets; ++mask) {
         memcpy(dst, mask_string, mask_length);
         dst += mask_length;

         *dst++ = '.';
     }

     dst--;
 }

 inline void format_ipv4(const unsigned char* src, char*& dst, uint8_t mask_tail_octets = 0,
                         const char* mask_string = "xxx") {
     format_ipv4(src, 4, dst, mask_tail_octets, mask_string);
 }

 /** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string.
  *
  * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`,
  * which should be long enough.
  * That is "127.0.0.1" becomes 0x7f000001.
  *
  * In case of failure doesn't modify buffer pointed by `dst`.
  *
  * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position())
  *           and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity.
  *           To parse strings use overloads below.
  *
  * @param src         - iterator (reference to pointer) over input string - warning - continuity is not guaranteed.
  * @param eof         - function returning true if iterator riched the end - warning - can break iterator's continuity.
  * @param dst         - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long.
  * @param first_octet - preparsed first octet
  * @return            - true if parsed successfully, false otherwise.
  */
 template <typename T, typename EOFfunction>
     requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
 inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t first_octet = -1) {
     if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
         return false;
     }

     int64_t result = 0;
     int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
     if (first_octet >= 0) {
         result |= first_octet << offset;
         offset -= IPV4_OCTET_BITS;
     }

     for (; true; offset -= IPV4_OCTET_BITS, ++src) {
         if (eof()) {
             return false;
         }

         int64_t value = 0;
         size_t len = 0;
         while (is_numeric_ascii(*src) && len <= 3) {
             value = value * DECIMAL_BASE + (*src - '0');
             ++len;
             ++src;
             if (eof()) {
                 break;
             }
         }
         if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
             return false;
         }
         result |= value << offset;

         if (offset == 0) {
             break;
         }
     }

     memcpy(dst, &result, sizeof(result));
     return true;
 }

 /// returns pointer to the right after parsed sequence or null on failed parsing
 inline const char* parse_ipv4(const char* src, const char* end, unsigned char* dst) {
     if (parse_ipv4(
                 src, [&src, end]() { return src == end; }, dst)) {
         return src;
     }
     return nullptr;
 }

 /// returns true if whole buffer was parsed successfully
 inline bool parse_ipv4_whole(const char* src, const char* end, unsigned char* dst) {
     return parse_ipv4(src, end, dst) == end;
 }

 /// returns pointer to the right after parsed sequence or null on failed parsing
 inline const char* parse_ipv4(const char* src, unsigned char* dst) {
     if (parse_ipv4(
                 src, []() { return false; }, dst)) {
         return src;
     }
     return nullptr;
 }

 /// returns true if whole null-terminated string was parsed successfully
 inline bool parse_ipv4_whole(const char* src, unsigned char* dst) {
     const char* end = parse_ipv4(src, dst);
     return end != nullptr && *end == '\0';
 }

 /// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base)
 inline constexpr UInt32 int_log(const UInt32 value, const UInt32 base, const bool carry) {
     return value >= base ? 1 + int_log(value / base, base, value % base || carry)
                          : value % base > 1 || carry;
 }

 /// Print integer in desired base, faster than sprintf.
 /// NOTE This is not the best way. See https://github.com/miloyip/itoa-benchmark
 /// But it doesn't matter here.
 template <UInt32 base, typename T>
 inline void print_integer(char*& out, T value) {
     if (value == 0) {
         *out++ = '0';
     } else {
         constexpr size_t buffer_size = sizeof(T) * int_log(256, base, false);

         char buf[buffer_size];
         auto ptr = buf;

         while (value > 0) {
             *ptr = hex_digit_lowercase(value % base);
             ++ptr;
             value /= base;
         }

         /// Copy to out reversed.
         while (ptr != buf) {
             --ptr;
             *out = *ptr;
             ++out;
         }
     }
 }

 /** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
   * performs significantly faster than the reference implementation due to the absence of sprintf calls,
   * bounds checking, unnecessary string copying and length calculation.
   * @param src         - pointer to IPv6 (16 bytes) stored in little-endian byte order
   * @param dst         - where to put format result bytes
   * @param zeroed_tail_bytes_count - the parameter is currently not being used
   */
 inline void format_ipv6(unsigned char* src, char*& dst, uint8_t zeroed_tail_bytes_count = 0) {
     struct {
         Int64 base, len;
     } best {-1, 0}, cur {-1, 0};
     std::array<UInt16, IPV6_BINARY_LENGTH / sizeof(UInt16)> words {};

     // the current function logic is processed in big endian manner
     // but ipv6 in doris is stored in little-endian byte order
     // so transfer to big-endian byte order first
     // compatible with parse_ipv6 function in format_ip.h
     std::reverse(src, src + IPV6_BINARY_LENGTH);

     /** Preprocess:
         *    Copy the input (bytewise) array into a wordwise array.
         *    Find the longest run of 0x00's in src[] for :: shorthanding. */
     for (size_t i = 0; i < (IPV6_BINARY_LENGTH - zeroed_tail_bytes_count); i += 2) {
         words[i / 2] = (uint16_t)(src[i] << 8) | src[i + 1];
     }

     for (size_t i = 0; i < words.size(); i++) {
         if (words[i] == 0) {
             if (cur.base == -1) {
                 cur.base = i;
                 cur.len = 1;
             } else {
                 cur.len++;
             }
         } else {
             if (cur.base != -1) {
                 if (best.base == -1 || cur.len > best.len) {
                     best = cur;
                 }
                 cur.base = -1;
             }
         }
     }

     if (cur.base != -1) {
         if (best.base == -1 || cur.len > best.len) {
             best = cur;
         }
     }
     if (best.base != -1 && best.len < 2) {
         best.base = -1;
     }

     /// Format the result.
     for (size_t i = 0; i < words.size(); i++) {
         /// Are we inside the best run of 0x00's?
         if (best.base != -1) {
             auto best_base = static_cast<size_t>(best.base);
             if (i >= best_base && i < (best_base + best.len)) {
                 if (i == best_base) {
                     *dst++ = ':';
                 }
                 continue;
             }
         }
         /// Are we following an initial run of 0x00s or any real hex?
         if (i != 0) {
             *dst++ = ':';
         }
         /// Is this address an encapsulated IPv4?
         if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu))) {
             uint8_t ipv4_buffer[IPV4_BINARY_LENGTH] = {0};
             memcpy(ipv4_buffer, src + 12, IPV4_BINARY_LENGTH);
             // Due to historical reasons format_ipv4() takes ipv4 in BE format, but inside ipv6 we store it in LE-format.
             if constexpr (std::endian::native == std::endian::little) {
                 std::reverse(std::begin(ipv4_buffer), std::end(ipv4_buffer));
             }
             format_ipv4(ipv4_buffer, dst,
                         std::min(zeroed_tail_bytes_count, static_cast<uint8_t>(IPV4_BINARY_LENGTH)),
                         "0");
             // format_ipv4 has already added a null-terminator for us.
             return;
         }
         print_integer<16>(dst, words[i]);
     }

     /// Was it a trailing run of 0x00's?
     if (best.base != -1 &&
         static_cast<size_t>(best.base) + static_cast<size_t>(best.len) == words.size()) {
         *dst++ = ':';
     }
 }

 /** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string.
 *
 * Parses the input string `src` and stores binary little-endian value into buffer pointed by `dst`,
 * which should be long enough. In case of failure zeroes IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`.
 *
 * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position())
 *           and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity.
 *           To parse strings use overloads below.
 *
 * @param src         - iterator (reference to pointer) over input string - warning - continuity is not guaranteed.
 * @param eof         - function returning true if iterator riched the end - warning - can break iterator's continuity.
 * @param dst         - where to put output bytes in little-endian byte order, expected to be non-null and at IPV6_BINARY_LENGTH-long.
 * @param first_block - preparsed first block
 * @return            - true if parsed successfully, false otherwise.
 */
 template <typename T, typename EOFfunction>
     requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
 inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) {
     const auto clear_dst = [dst]() {
         std::memset(dst, '\0', IPV6_BINARY_LENGTH);
         return false;
     };

     if (src == nullptr || eof()) return clear_dst();

     int groups = 0;            /// number of parsed groups
     unsigned char* iter = dst; /// iterator over dst buffer
     unsigned char* zptr =
             nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started

     std::memset(dst, '\0', IPV6_BINARY_LENGTH);

     if (first_block >= 0) {
         *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu);
         *iter++ = static_cast<unsigned char>(first_block & 0xffu);
         if (*src == ':') {
             zptr = iter;
             ++src;
         }
         ++groups;
     }

     bool group_start = true;

     while (!eof() && groups < 8) {
         if (*src == ':') {
             ++src;
             if (eof()) /// trailing colon is not allowed
                 return clear_dst();

             group_start = true;

             if (*src == ':') {
                 if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed
                     return clear_dst();
                 zptr = iter;
                 ++src;
                 continue;
             }
             if (groups == 0) /// leading colon is not allowed
                 return clear_dst();
         }

         /// mixed IPv4 parsing
         if (*src == '.') {
             if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first
                 return clear_dst();

             if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group
                 return clear_dst();

             ++src;
             if (eof()) return clear_dst();

             /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4
             --groups;
             iter -= 2;

             UInt16 num = 0;
             for (int i = 0; i < 2; ++i) {
                 unsigned char first = (iter[i] >> 4) & 0x0fu;
                 unsigned char second = iter[i] & 0x0fu;
                 if (first > 9 || second > 9) return clear_dst();
                 (num *= 100) += first * 10 + second;
             }
             if (num > 255) return clear_dst();

             /// parse IPv4 with known first octet
             if (!parse_ipv4(src, eof, iter, num)) return clear_dst();

             if constexpr (std::endian::native == std::endian::little)
                 std::reverse(iter, iter + IPV4_BINARY_LENGTH);

             iter += 4;
             groups += 2;
             break; /// IPv4 block is the last - end of parsing
         }

         if (!group_start) /// end of parsing
             break;
         group_start = false;

         UInt16 val = 0;  /// current decoded group
         int xdigits = 0; /// number of decoded hex digits in current group

         for (; !eof() && xdigits < 4; ++src, ++xdigits) {
             UInt8 num = unhex(*src);
             if (num == 0xFF) break;
             (val <<= 4) |= num;
         }

         if (xdigits == 0) /// end of parsing
             break;

         *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu);
         *iter++ = static_cast<unsigned char>(val & 0xffu);
         ++groups;
     }

     /// either all 8 groups or all-zeroes block should be present
     if (groups < 8 && zptr == nullptr) return clear_dst();

     /// process all-zeroes block
     if (zptr != nullptr) {
         size_t msize = iter - zptr;
         std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize);
         std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst));
     }

     /// the current function logic is processed in big endian manner
     /// but ipv6 in doris is stored in little-endian byte order
     /// so transfer to little-endian
     std::reverse(dst, dst + IPV6_BINARY_LENGTH);

     return true;
 }

 /// returns pointer to the right after parsed sequence or null on failed parsing
 inline const char* parse_ipv6(const char* src, const char* end, unsigned char* dst) {
     if (parse_ipv6(
                 src, [&src, end]() { return src == end; }, dst))
         return src;
     return nullptr;
 }

 /// returns true if whole buffer was parsed successfully
 inline bool parse_ipv6_whole(const char* src, const char* end, unsigned char* dst) {
     return parse_ipv6(src, end, dst) == end;
 }

 /// returns pointer to the right after parsed sequence or null on failed parsing
 inline const char* parse_ipv6(const char* src, unsigned char* dst) {
     if (parse_ipv6(
                 src, []() { return false; }, dst))
         return src;
     return nullptr;
 }

 /// returns true if whole null-terminated string was parsed successfully
 inline bool parse_ipv6_whole(const char* src, unsigned char* dst) {
     const char* end = parse_ipv6(src, dst);
     return end != nullptr && *end == '\0';
 }

 #include "common/compile_check_end.h"
 } // namespace doris::vectorized
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.
	// This file is copied from
	// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/formatIPv6.h
	// and modified by Doris

	#pragma once

	#include <vec/common/hex.h>
	#include <vec/common/string_utils/string_utils.h>
	#include <vec/core/types.h>

	#include <algorithm>
	#include <array>
	#include <bit>
	#include <cstdint>
	#include <cstring>
	#include <utility>

	constexpr size_t IPV4_BINARY_LENGTH = 4;
	constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte.
	constexpr size_t IPV6_MAX_TEXT_LENGTH = 39;
	constexpr size_t IPV4_MIN_NUM_VALUE = 0; //num value of '0.0.0.0'
	constexpr size_t IPV4_MAX_NUM_VALUE = 4294967295; //num value of '255.255.255.255'
	constexpr int IPV4_MAX_OCTET_VALUE = 255; //max value of octet
	constexpr size_t IPV4_OCTET_BITS = 8;
	constexpr size_t DECIMAL_BASE = 10;
	constexpr size_t IPV6_BINARY_LENGTH = 16;

	namespace doris::vectorized {
	#include "common/compile_check_begin.h"

	extern const std::array<std::pair<const char*, size_t>, 256> one_byte_to_string_lookup_table;

	/** Format 4-byte binary sequesnce as IPv4 text: 'aaa.bbb.ccc.ddd',
	* expects in out to be in BE-format, that is 0x7f000001 => "127.0.0.1".
	*
	* Any number of the tail bytes can be masked with given mask string.
	*
	* Assumptions:
	* src is IPV4_BINARY_LENGTH long,
	* dst is IPV4_MAX_TEXT_LENGTH long,
	* mask_tail_octets <= IPV4_BINARY_LENGTH
	* mask_string is NON-NULL, if mask_tail_octets > 0.
	*
	* Examples:
	* format_ipv4(&0x7f000001, dst, mask_tail_octets = 0, nullptr);
	* > dst == "127.0.0.1"
	* format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "xxx");
	* > dst == "127.0.0.xxx"
	* format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "0");
	* > dst == "127.0.0.0"
	*/
	inline void format_ipv4(const unsigned char* src, size_t src_size, char*& dst,
	uint8_t mask_tail_octets = 0, const char* mask_string = "xxx") {
	const size_t mask_length = mask_string ? strlen(mask_string) : 0;
	const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets);
	const size_t padding = std::min(4 - src_size, limit);
	for (size_t octet = 0; octet < padding; ++octet) {
	*dst++ = '0';
	*dst++ = '.';
	}

	for (size_t octet = 4 - src_size; octet < limit; ++octet) {
	uint8_t value = 0;
	if constexpr (std::endian::native == std::endian::little)
	value = static_cast<uint8_t>(src[IPV4_BINARY_LENGTH - octet - 1]);
	else
	value = static_cast<uint8_t>(src[octet]);
	const uint8_t len = static_cast<uint8_t>(one_byte_to_string_lookup_table[value].second);
	const char* str = one_byte_to_string_lookup_table[value].first;

	memcpy(dst, str, len);
	dst += len;

	*dst++ = '.';
	}

	for (size_t mask = 0; mask < mask_tail_octets; ++mask) {
	memcpy(dst, mask_string, mask_length);
	dst += mask_length;

	*dst++ = '.';
	}

	dst--;
	}

	inline void format_ipv4(const unsigned char* src, char*& dst, uint8_t mask_tail_octets = 0,
	const char* mask_string = "xxx") {
	format_ipv4(src, 4, dst, mask_tail_octets, mask_string);
	}

	/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string.
	*
	* Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`,
	* which should be long enough.
	* That is "127.0.0.1" becomes 0x7f000001.
	*
	* In case of failure doesn't modify buffer pointed by `dst`.
	*
	* WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position())
	* and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity.
	* To parse strings use overloads below.
	*
	* @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed.
	* @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity.
	* @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long.
	* @param first_octet - preparsed first octet
	* @return - true if parsed successfully, false otherwise.
	*/
	template <typename T, typename EOFfunction>
	requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
	inline bool parse_ipv4(T& src, EOFfunction eof, unsigned char dst, int64_t first_octet = -1) {
	if (src == nullptr \|\| first_octet > IPV4_MAX_OCTET_VALUE) {
	return false;
	}

	int64_t result = 0;
	int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
	if (first_octet >= 0) {
	result \|= first_octet << offset;
	offset -= IPV4_OCTET_BITS;
	}

	for (; true; offset -= IPV4_OCTET_BITS, ++src) {
	if (eof()) {
	return false;
	}

	int64_t value = 0;
	size_t len = 0;
	while (is_numeric_ascii(*src) && len <= 3) {
	value = value * DECIMAL_BASE + (*src - '0');
	++len;
	++src;
	if (eof()) {
	break;
	}
	}
	if (len == 0 \|\| value > IPV4_MAX_OCTET_VALUE \|\| (offset > 0 && (eof() \|\| *src != '.'))) {
	return false;
	}
	result \|= value << offset;

	if (offset == 0) {
	break;
	}
	}

	memcpy(dst, &result, sizeof(result));
	return true;
	}

	/// returns pointer to the right after parsed sequence or null on failed parsing
	inline const char* parse_ipv4(const char* src, const char* end, unsigned char* dst) {
	if (parse_ipv4(
	src, [&src, end]() { return src == end; }, dst)) {
	return src;
	}
	return nullptr;
	}

	/// returns true if whole buffer was parsed successfully
	inline bool parse_ipv4_whole(const char* src, const char* end, unsigned char* dst) {
	return parse_ipv4(src, end, dst) == end;
	}

	/// returns pointer to the right after parsed sequence or null on failed parsing
	inline const char* parse_ipv4(const char* src, unsigned char* dst) {
	if (parse_ipv4(
	src, []() { return false; }, dst)) {
	return src;
	}
	return nullptr;
	}

	/// returns true if whole null-terminated string was parsed successfully
	inline bool parse_ipv4_whole(const char* src, unsigned char* dst) {
	const char* end = parse_ipv4(src, dst);
	return end != nullptr && *end == '\0';
	}

	/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base)
	inline constexpr UInt32 int_log(const UInt32 value, const UInt32 base, const bool carry) {
	return value >= base ? 1 + int_log(value / base, base, value % base \|\| carry)
	: value % base > 1 \|\| carry;
	}

	/// Print integer in desired base, faster than sprintf.
	/// NOTE This is not the best way. See https://github.com/miloyip/itoa-benchmark
	/// But it doesn't matter here.
	template <UInt32 base, typename T>
	inline void print_integer(char*& out, T value) {
	if (value == 0) {
	*out++ = '0';
	} else {
	constexpr size_t buffer_size = sizeof(T) * int_log(256, base, false);

	char buf[buffer_size];
	auto ptr = buf;

	while (value > 0) {
	*ptr = hex_digit_lowercase(value % base);
	++ptr;
	value /= base;
	}

	/// Copy to out reversed.
	while (ptr != buf) {
	--ptr;
	out = ptr;
	++out;
	}
	}
	}

	/** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
	* performs significantly faster than the reference implementation due to the absence of sprintf calls,
	* bounds checking, unnecessary string copying and length calculation.
	* @param src - pointer to IPv6 (16 bytes) stored in little-endian byte order
	* @param dst - where to put format result bytes
	* @param zeroed_tail_bytes_count - the parameter is currently not being used
	*/
	inline void format_ipv6(unsigned char* src, char*& dst, uint8_t zeroed_tail_bytes_count = 0) {
	struct {
	Int64 base, len;
	} best {-1, 0}, cur {-1, 0};
	std::array<UInt16, IPV6_BINARY_LENGTH / sizeof(UInt16)> words {};

	// the current function logic is processed in big endian manner
	// but ipv6 in doris is stored in little-endian byte order
	// so transfer to big-endian byte order first
	// compatible with parse_ipv6 function in format_ip.h
	std::reverse(src, src + IPV6_BINARY_LENGTH);

	/** Preprocess:
	* Copy the input (bytewise) array into a wordwise array.
	* Find the longest run of 0x00's in src[] for :: shorthanding. */
	for (size_t i = 0; i < (IPV6_BINARY_LENGTH - zeroed_tail_bytes_count); i += 2) {
	words[i / 2] = (uint16_t)(src[i] << 8) \| src[i + 1];
	}

	for (size_t i = 0; i < words.size(); i++) {
	if (words[i] == 0) {
	if (cur.base == -1) {
	cur.base = i;
	cur.len = 1;
	} else {
	cur.len++;
	}
	} else {
	if (cur.base != -1) {
	if (best.base == -1 \|\| cur.len > best.len) {
	best = cur;
	}
	cur.base = -1;
	}
	}
	}

	if (cur.base != -1) {
	if (best.base == -1 \|\| cur.len > best.len) {
	best = cur;
	}
	}
	if (best.base != -1 && best.len < 2) {
	best.base = -1;
	}

	/// Format the result.
	for (size_t i = 0; i < words.size(); i++) {
	/// Are we inside the best run of 0x00's?
	if (best.base != -1) {
	auto best_base = static_cast<size_t>(best.base);
	if (i >= best_base && i < (best_base + best.len)) {
	if (i == best_base) {
	*dst++ = ':';
	}
	continue;
	}
	}
	/// Are we following an initial run of 0x00s or any real hex?
	if (i != 0) {
	*dst++ = ':';
	}
	/// Is this address an encapsulated IPv4?
	if (i == 6 && best.base == 0 && (best.len == 6 \|\| (best.len == 5 && words[5] == 0xffffu))) {
	uint8_t ipv4_buffer[IPV4_BINARY_LENGTH] = {0};
	memcpy(ipv4_buffer, src + 12, IPV4_BINARY_LENGTH);
	// Due to historical reasons format_ipv4() takes ipv4 in BE format, but inside ipv6 we store it in LE-format.
	if constexpr (std::endian::native == std::endian::little) {
	std::reverse(std::begin(ipv4_buffer), std::end(ipv4_buffer));
	}
	format_ipv4(ipv4_buffer, dst,
	std::min(zeroed_tail_bytes_count, static_cast<uint8_t>(IPV4_BINARY_LENGTH)),
	"0");
	// format_ipv4 has already added a null-terminator for us.
	return;
	}
	print_integer<16>(dst, words[i]);
	}

	/// Was it a trailing run of 0x00's?
	if (best.base != -1 &&
	static_cast<size_t>(best.base) + static_cast<size_t>(best.len) == words.size()) {
	*dst++ = ':';
	}
	}

	/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string.
	*
	* Parses the input string `src` and stores binary little-endian value into buffer pointed by `dst`,
	* which should be long enough. In case of failure zeroes IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`.
	*
	* WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position())
	* and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity.
	* To parse strings use overloads below.
	*
	* @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed.
	* @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity.
	* @param dst - where to put output bytes in little-endian byte order, expected to be non-null and at IPV6_BINARY_LENGTH-long.
	* @param first_block - preparsed first block
	* @return - true if parsed successfully, false otherwise.
	*/
	template <typename T, typename EOFfunction>
	requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
	inline bool parse_ipv6(T& src, EOFfunction eof, unsigned char dst, int32_t first_block = -1) {
	const auto clear_dst = [dst]() {
	std::memset(dst, '\0', IPV6_BINARY_LENGTH);
	return false;
	};

	if (src == nullptr \|\| eof()) return clear_dst();

	int groups = 0; /// number of parsed groups
	unsigned char* iter = dst; /// iterator over dst buffer
	unsigned char* zptr =
	nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started

	std::memset(dst, '\0', IPV6_BINARY_LENGTH);

	if (first_block >= 0) {
	*iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu);
	*iter++ = static_cast<unsigned char>(first_block & 0xffu);
	if (*src == ':') {
	zptr = iter;
	++src;
	}
	++groups;
	}

	bool group_start = true;

	while (!eof() && groups < 8) {
	if (*src == ':') {
	++src;
	if (eof()) /// trailing colon is not allowed
	return clear_dst();

	group_start = true;

	if (*src == ':') {
	if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed
	return clear_dst();
	zptr = iter;
	++src;
	continue;
	}
	if (groups == 0) /// leading colon is not allowed
	return clear_dst();
	}

	/// mixed IPv4 parsing
	if (*src == '.') {
	if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first
	return clear_dst();

	if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group
	return clear_dst();

	++src;
	if (eof()) return clear_dst();

	/// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4
	--groups;
	iter -= 2;

	UInt16 num = 0;
	for (int i = 0; i < 2; ++i) {
	unsigned char first = (iter[i] >> 4) & 0x0fu;
	unsigned char second = iter[i] & 0x0fu;
	if (first > 9 \|\| second > 9) return clear_dst();
	(num = 100) += first 10 + second;
	}
	if (num > 255) return clear_dst();

	/// parse IPv4 with known first octet
	if (!parse_ipv4(src, eof, iter, num)) return clear_dst();

	if constexpr (std::endian::native == std::endian::little)
	std::reverse(iter, iter + IPV4_BINARY_LENGTH);

	iter += 4;
	groups += 2;
	break; /// IPv4 block is the last - end of parsing
	}

	if (!group_start) /// end of parsing
	break;
	group_start = false;

	UInt16 val = 0; /// current decoded group
	int xdigits = 0; /// number of decoded hex digits in current group

	for (; !eof() && xdigits < 4; ++src, ++xdigits) {
	UInt8 num = unhex(*src);
	if (num == 0xFF) break;
	(val <<= 4) \|= num;
	}

	if (xdigits == 0) /// end of parsing
	break;

	*iter++ = static_cast<unsigned char>((val >> 8) & 0xffu);
	*iter++ = static_cast<unsigned char>(val & 0xffu);
	++groups;
	}

	/// either all 8 groups or all-zeroes block should be present
	if (groups < 8 && zptr == nullptr) return clear_dst();

	/// process all-zeroes block
	if (zptr != nullptr) {
	size_t msize = iter - zptr;
	std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize);
	std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst));
	}

	/// the current function logic is processed in big endian manner
	/// but ipv6 in doris is stored in little-endian byte order
	/// so transfer to little-endian
	std::reverse(dst, dst + IPV6_BINARY_LENGTH);

	return true;
	}

	/// returns pointer to the right after parsed sequence or null on failed parsing
	inline const char* parse_ipv6(const char* src, const char* end, unsigned char* dst) {
	if (parse_ipv6(
	src, [&src, end]() { return src == end; }, dst))
	return src;
	return nullptr;
	}

	/// returns true if whole buffer was parsed successfully
	inline bool parse_ipv6_whole(const char* src, const char* end, unsigned char* dst) {
	return parse_ipv6(src, end, dst) == end;
	}

	/// returns pointer to the right after parsed sequence or null on failed parsing
	inline const char* parse_ipv6(const char* src, unsigned char* dst) {
	if (parse_ipv6(
	src, []() { return false; }, dst))
	return src;
	return nullptr;
	}

	/// returns true if whole null-terminated string was parsed successfully
	inline bool parse_ipv6_whole(const char* src, unsigned char* dst) {
	const char* end = parse_ipv6(src, dst);
	return end != nullptr && *end == '\0';
	}

	#include "common/compile_check_end.h"
	} // namespace doris::vectorized