blob: a5f272334e5453e27de001b4e7b69f6bd7656f88 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <Formats/FormatSettings.h>
#include <IO/ReadBuffer.h>
#include <IO/ReadHelpers.h>
#include <Common/LocalDate.h>
#include <Common/StringUtils.h>
#include "ExcelNumberReader.h"
namespace local_engine
{
inline bool checkDate(const UInt16 & year, const UInt8 & month_, const UInt8 & day_)
{
auto is_leap_year_ = (year % 400 == 0) || (year % 100 != 0 && year % 4 == 0);
if ((month_ == 1 || month_ == 3 || month_ == 5 || month_ == 7 || month_ == 8 || month_ == 10 || month_ == 12) && day_ >= 1
&& day_ <= 31)
return true;
else if (month_ == 2 && ((is_leap_year_ && day_ >= 1 && day_ <= 29) || (!is_leap_year_ && day_ >= 1 && day_ <= 28)))
return true;
else if ((month_ == 4 || month_ == 6 || month_ == 9 || month_ == 11) && day_ >= 1 && day_ <= 30)
return true;
return false;
}
inline size_t readDigits(char * res, size_t max_chars, DB::ReadBuffer & in)
{
size_t num_chars = 0;
while (!in.eof() && isNumericASCII(*in.position()) && num_chars < max_chars)
{
res[num_chars] = *in.position() - '0';
++num_chars;
++in.position();
}
return num_chars;
}
template <size_t digit, size_t power_of_ten, typename T>
inline void readDecimalNumberImpl(T & res, const char * src)
{
res += src[digit] * power_of_ten;
if constexpr (digit > 0)
readDecimalNumberImpl<digit - 1, power_of_ten * 10>(res, src);
}
template <size_t num_digits, typename T>
inline void readDecimalNumber(T & res, const char * src)
{
readDecimalNumberImpl<num_digits - 1, 1>(res, src);
}
template <size_t num_digits, typename T>
inline bool readNumber(DB::ReadBuffer & buf, T & res)
{
char digits[std::numeric_limits<UInt64>::digits10];
size_t read_num_digits = readDigits(digits, sizeof(digits), buf);
if (read_num_digits != num_digits)
return false;
readDecimalNumber<num_digits>(res, digits);
return true;
}
bool readDatetime64TextWithExcel(
DB::DateTime64 & datetime64,
UInt32 scale,
DB::ReadBuffer & buf,
const DateLUTImpl & time_zone,
const DB::FormatSettings::CSV & settings,
bool quote);
bool readDateTime64Text(
DB::DateTime64 & x,
DB::ReadBuffer & buf,
const DB::FormatSettings & settings,
const DateLUTImpl & time_zone,
const DateLUTImpl & utc_time_zone,
bool quote);
bool readDateTextWithExcel(LocalDate & date, DB::ReadBuffer & buf, bool is_us_style, const DB::FormatSettings & settings);
bool readDateText(LocalDate & date, DB::ReadBuffer & buf, const DB::FormatSettings & settings);
template <typename T>
inline bool readExcelIntegerText(T & x, DB::ReadBuffer & buf, bool has_quote, const DB::FormatSettings & settings)
{
if constexpr (std::is_same_v<decltype(x), bool &>)
{
readBoolText(x, buf);
return true;
}
else
return readExcelIntTextImpl(x, buf, has_quote, settings);
}
inline bool readExcelText(is_floating_point auto & x, DB::ReadBuffer & buf, bool has_quote, const DB::FormatSettings & settings)
{
return readExcelFloatTextFastImpl(x, buf, has_quote, settings);
}
inline bool readExcelText(is_integer auto & x, DB::ReadBuffer & buf, bool has_quote, const DB::FormatSettings & settings)
{
return readExcelIntegerText(x, buf, has_quote, settings);
}
inline bool readExcelText(LocalDate & x, DB::ReadBuffer & buf, bool /*has_quote*/, const DB::FormatSettings & settings)
{
return readDateText(x, buf, settings);
}
/// CSV, for numbers, dates: quotes are optional, no special escaping rules.
template <typename T>
bool readCSVSimple(T & x, DB::ReadBuffer & buf, const DB::FormatSettings & settings)
{
if (buf.eof())
DB::throwReadAfterEOF();
char maybe_quote = *buf.position();
bool has_quote = false;
if ((settings.csv.allow_single_quotes && maybe_quote == '\'') || (settings.csv.allow_double_quotes && maybe_quote == '\"'))
{
has_quote = true;
++buf.position();
}
/// deal empty string ""
if ((has_quote && !buf.eof() && *buf.position() == maybe_quote)
|| (!has_quote && !buf.eof() && (*buf.position() == settings.csv.delimiter || *buf.position() == '\n' || *buf.position() == '\r')))
return false;
bool result = readExcelText(x, buf, has_quote, settings);
if (!result)
return false;
if (has_quote)
assertChar(maybe_quote, buf);
while (!buf.eof() && *buf.position() == ' ')
{
//ignore end whitespace
++buf.position();
}
if (!buf.eof() && (*buf.position() != settings.csv.delimiter && *buf.position() != '\n' && *buf.position() != '\r'))
return false;
return true;
}
template <typename T>
requires is_arithmetic_v<T>
inline bool readCSV(T & x, DB::ReadBuffer & buf, const DB::FormatSettings & settings)
{
return readCSVSimple(x, buf, settings);
}
inline bool readCSV(LocalDate & x, DB::ReadBuffer & buf, const DB::FormatSettings & settings)
{
return readCSVSimple(x, buf, settings);
}
}