blob: e790a10acf1f9209a28f0a31bc43d0a724c744dd [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <string>
#include <vector>
#include <gtest/gtest.h>
#include "arrow/testing/gtest_util.h"
#include "arrow/type.h"
#include "arrow/util/value_parsing.h"
namespace arrow {
namespace internal {
template <typename T>
void AssertConversion(const T& type, const std::string& s, typename T::c_type expected) {
typename T::c_type out{};
ASSERT_TRUE(ParseValue(type, s.data(), s.length(), &out))
<< "Conversion failed for '" << s << "' (expected to return " << expected << ")";
ASSERT_EQ(out, expected) << "Conversion failed for '" << s << "'";
}
template <typename T>
void AssertConversion(const std::string& s, typename T::c_type expected) {
auto type = checked_pointer_cast<T>(TypeTraits<T>::type_singleton());
AssertConversion(*type, s, expected);
}
template <typename T>
void AssertConversionFails(const T& type, const std::string& s) {
typename T::c_type out{};
ASSERT_FALSE(ParseValue(type, s.data(), s.length(), &out))
<< "Conversion should have failed for '" << s << "' (returned " << out << ")";
}
template <typename T>
void AssertConversionFails(const std::string& s) {
auto type = checked_pointer_cast<T>(TypeTraits<T>::type_singleton());
AssertConversionFails(*type, s);
}
TEST(StringConversion, ToBoolean) {
AssertConversion<BooleanType>("true", true);
AssertConversion<BooleanType>("tRuE", true);
AssertConversion<BooleanType>("FAlse", false);
AssertConversion<BooleanType>("false", false);
AssertConversion<BooleanType>("1", true);
AssertConversion<BooleanType>("0", false);
AssertConversionFails<BooleanType>("");
}
TEST(StringConversion, ToFloat) {
AssertConversion<FloatType>("1.5", 1.5f);
AssertConversion<FloatType>("0", 0.0f);
// XXX ASSERT_EQ doesn't distinguish signed zeros
AssertConversion<FloatType>("-0.0", -0.0f);
AssertConversion<FloatType>("-1e20", -1e20f);
AssertConversionFails<FloatType>("");
AssertConversionFails<FloatType>("e");
}
TEST(StringConversion, ToDouble) {
AssertConversion<DoubleType>("1.5", 1.5);
AssertConversion<DoubleType>("0", 0);
// XXX ASSERT_EQ doesn't distinguish signed zeros
AssertConversion<DoubleType>("-0.0", -0.0);
AssertConversion<DoubleType>("-1e100", -1e100);
AssertConversionFails<DoubleType>("");
AssertConversionFails<DoubleType>("e");
}
#if !defined(_WIN32) || defined(NDEBUG)
TEST(StringConversion, ToFloatLocale) {
// French locale uses the comma as decimal point
LocaleGuard locale_guard("fr_FR.UTF-8");
AssertConversion<FloatType>("1.5", 1.5f);
}
TEST(StringConversion, ToDoubleLocale) {
// French locale uses the comma as decimal point
LocaleGuard locale_guard("fr_FR.UTF-8");
AssertConversion<DoubleType>("1.5", 1.5f);
}
#endif // _WIN32
TEST(StringConversion, ToInt8) {
AssertConversion<Int8Type>("0", 0);
AssertConversion<Int8Type>("127", 127);
AssertConversion<Int8Type>("0127", 127);
AssertConversion<Int8Type>("-128", -128);
AssertConversion<Int8Type>("-00128", -128);
// Non-representable values
AssertConversionFails<Int8Type>("128");
AssertConversionFails<Int8Type>("-129");
AssertConversionFails<Int8Type>("");
AssertConversionFails<Int8Type>("-");
AssertConversionFails<Int8Type>("0.0");
AssertConversionFails<Int8Type>("e");
}
TEST(StringConversion, ToUInt8) {
AssertConversion<UInt8Type>("0", 0);
AssertConversion<UInt8Type>("26", 26);
AssertConversion<UInt8Type>("255", 255);
AssertConversion<UInt8Type>("0255", 255);
// Non-representable values
AssertConversionFails<UInt8Type>("-1");
AssertConversionFails<UInt8Type>("256");
AssertConversionFails<UInt8Type>("260");
AssertConversionFails<UInt8Type>("1234");
AssertConversionFails<UInt8Type>("");
AssertConversionFails<UInt8Type>("-");
AssertConversionFails<UInt8Type>("0.0");
AssertConversionFails<UInt8Type>("e");
}
TEST(StringConversion, ToInt16) {
AssertConversion<Int16Type>("0", 0);
AssertConversion<Int16Type>("32767", 32767);
AssertConversion<Int16Type>("032767", 32767);
AssertConversion<Int16Type>("-32768", -32768);
AssertConversion<Int16Type>("-0032768", -32768);
// Non-representable values
AssertConversionFails<Int16Type>("32768");
AssertConversionFails<Int16Type>("-32769");
AssertConversionFails<Int16Type>("");
AssertConversionFails<Int16Type>("-");
AssertConversionFails<Int16Type>("0.0");
AssertConversionFails<Int16Type>("e");
}
TEST(StringConversion, ToUInt16) {
AssertConversion<UInt16Type>("0", 0);
AssertConversion<UInt16Type>("6660", 6660);
AssertConversion<UInt16Type>("65535", 65535);
AssertConversion<UInt16Type>("065535", 65535);
// Non-representable values
AssertConversionFails<UInt16Type>("-1");
AssertConversionFails<UInt16Type>("65536");
AssertConversionFails<UInt16Type>("123456");
AssertConversionFails<UInt16Type>("");
AssertConversionFails<UInt16Type>("-");
AssertConversionFails<UInt16Type>("0.0");
AssertConversionFails<UInt16Type>("e");
}
TEST(StringConversion, ToInt32) {
AssertConversion<Int32Type>("0", 0);
AssertConversion<Int32Type>("2147483647", 2147483647);
AssertConversion<Int32Type>("02147483647", 2147483647);
AssertConversion<Int32Type>("-2147483648", -2147483648LL);
AssertConversion<Int32Type>("-002147483648", -2147483648LL);
// Non-representable values
AssertConversionFails<Int32Type>("2147483648");
AssertConversionFails<Int32Type>("-2147483649");
AssertConversionFails<Int32Type>("");
AssertConversionFails<Int32Type>("-");
AssertConversionFails<Int32Type>("0.0");
AssertConversionFails<Int32Type>("e");
}
TEST(StringConversion, ToUInt32) {
AssertConversion<UInt32Type>("0", 0);
AssertConversion<UInt32Type>("432198765", 432198765UL);
AssertConversion<UInt32Type>("4294967295", 4294967295UL);
AssertConversion<UInt32Type>("04294967295", 4294967295UL);
// Non-representable values
AssertConversionFails<UInt32Type>("-1");
AssertConversionFails<UInt32Type>("4294967296");
AssertConversionFails<UInt32Type>("12345678901");
AssertConversionFails<UInt32Type>("");
AssertConversionFails<UInt32Type>("-");
AssertConversionFails<UInt32Type>("0.0");
AssertConversionFails<UInt32Type>("e");
}
TEST(StringConversion, ToInt64) {
AssertConversion<Int64Type>("0", 0);
AssertConversion<Int64Type>("9223372036854775807", 9223372036854775807LL);
AssertConversion<Int64Type>("09223372036854775807", 9223372036854775807LL);
AssertConversion<Int64Type>("-9223372036854775808", -9223372036854775807LL - 1);
AssertConversion<Int64Type>("-009223372036854775808", -9223372036854775807LL - 1);
// Non-representable values
AssertConversionFails<Int64Type>("9223372036854775808");
AssertConversionFails<Int64Type>("-9223372036854775809");
AssertConversionFails<Int64Type>("");
AssertConversionFails<Int64Type>("-");
AssertConversionFails<Int64Type>("0.0");
AssertConversionFails<Int64Type>("e");
}
TEST(StringConversion, ToUInt64) {
AssertConversion<UInt64Type>("0", 0);
AssertConversion<UInt64Type>("18446744073709551615", 18446744073709551615ULL);
// Non-representable values
AssertConversionFails<UInt64Type>("-1");
AssertConversionFails<UInt64Type>("18446744073709551616");
AssertConversionFails<UInt64Type>("");
AssertConversionFails<UInt64Type>("-");
AssertConversionFails<UInt64Type>("0.0");
AssertConversionFails<UInt64Type>("e");
}
TEST(StringConversion, ToDate32) {
AssertConversion<Date32Type>("1970-01-01", 0);
AssertConversion<Date32Type>("1970-01-02", 1);
AssertConversion<Date32Type>("2020-03-15", 18336);
AssertConversion<Date32Type>("1945-05-08", -9004);
AssertConversion<Date32Type>("4707-11-28", 999999);
AssertConversion<Date32Type>("0001-01-01", -719162);
// Invalid format
AssertConversionFails<Date32Type>("");
AssertConversionFails<Date32Type>("1970");
AssertConversionFails<Date32Type>("1970-01");
AssertConversionFails<Date32Type>("1970-01-01 00:00:00");
AssertConversionFails<Date32Type>("1970/01/01");
}
TEST(StringConversion, ToDate64) {
AssertConversion<Date64Type>("1970-01-01", 0);
AssertConversion<Date64Type>("1970-01-02", 86400000);
AssertConversion<Date64Type>("2020-03-15", 1584230400000LL);
AssertConversion<Date64Type>("1945-05-08", -777945600000LL);
AssertConversion<Date64Type>("4707-11-28", 86399913600000LL);
AssertConversion<Date64Type>("0001-01-01", -62135596800000LL);
}
TEST(StringConversion, ToTimestampDate_ISO8601) {
{
TimestampType type{TimeUnit::SECOND};
AssertConversion(type, "1970-01-01", 0);
AssertConversion(type, "1989-07-14", 616377600);
AssertConversion(type, "2000-02-29", 951782400);
AssertConversion(type, "3989-07-14", 63730281600LL);
AssertConversion(type, "1900-02-28", -2203977600LL);
AssertConversionFails(type, "");
AssertConversionFails(type, "1970");
AssertConversionFails(type, "19700101");
AssertConversionFails(type, "1970/01/01");
AssertConversionFails(type, "1970-01-01 ");
AssertConversionFails(type, "1970-01-01Z");
// Invalid dates
AssertConversionFails(type, "1970-00-01");
AssertConversionFails(type, "1970-13-01");
AssertConversionFails(type, "1970-01-32");
AssertConversionFails(type, "1970-02-29");
AssertConversionFails(type, "2100-02-29");
}
{
TimestampType type{TimeUnit::MILLI};
AssertConversion(type, "1970-01-01", 0);
AssertConversion(type, "1989-07-14", 616377600000LL);
AssertConversion(type, "3989-07-14", 63730281600000LL);
AssertConversion(type, "1900-02-28", -2203977600000LL);
}
{
TimestampType type{TimeUnit::MICRO};
AssertConversion(type, "1970-01-01", 0);
AssertConversion(type, "1989-07-14", 616377600000000LL);
AssertConversion(type, "3989-07-14", 63730281600000000LL);
AssertConversion(type, "1900-02-28", -2203977600000000LL);
}
{
TimestampType type{TimeUnit::NANO};
AssertConversion(type, "1970-01-01", 0);
AssertConversion(type, "1989-07-14", 616377600000000000LL);
AssertConversion(type, "2018-11-13", 1542067200000000000LL);
AssertConversion(type, "1900-02-28", -2203977600000000000LL);
}
}
TEST(StringConversion, ToTimestampDateTime_ISO8601) {
{
TimestampType type{TimeUnit::SECOND};
AssertConversion(type, "1970-01-01 00:00:00", 0);
AssertConversion(type, "2018-11-13 17", 1542128400);
AssertConversion(type, "2018-11-13T17", 1542128400);
AssertConversion(type, "2018-11-13 17Z", 1542128400);
AssertConversion(type, "2018-11-13T17Z", 1542128400);
AssertConversion(type, "2018-11-13 17:11", 1542129060);
AssertConversion(type, "2018-11-13T17:11", 1542129060);
AssertConversion(type, "2018-11-13 17:11Z", 1542129060);
AssertConversion(type, "2018-11-13T17:11Z", 1542129060);
AssertConversion(type, "2018-11-13 17:11:10", 1542129070);
AssertConversion(type, "2018-11-13T17:11:10", 1542129070);
AssertConversion(type, "2018-11-13 17:11:10Z", 1542129070);
AssertConversion(type, "2018-11-13T17:11:10Z", 1542129070);
AssertConversion(type, "1900-02-28 12:34:56", -2203932304LL);
// No subseconds allowed
AssertConversionFails(type, "1900-02-28 12:34:56.001");
// Invalid dates
AssertConversionFails(type, "1970-02-29 00:00:00");
AssertConversionFails(type, "2100-02-29 00:00:00");
// Invalid times
AssertConversionFails(type, "1970-01-01 24");
AssertConversionFails(type, "1970-01-01 00:60");
AssertConversionFails(type, "1970-01-01 00,00");
AssertConversionFails(type, "1970-01-01 24:00:00");
AssertConversionFails(type, "1970-01-01 00:60:00");
AssertConversionFails(type, "1970-01-01 00:00:60");
AssertConversionFails(type, "1970-01-01 00:00,00");
AssertConversionFails(type, "1970-01-01 00,00:00");
}
{
TimestampType type{TimeUnit::MILLI};
AssertConversion(type, "2018-11-13 17:11:10", 1542129070000LL);
AssertConversion(type, "2018-11-13T17:11:10Z", 1542129070000LL);
AssertConversion(type, "3989-07-14T11:22:33Z", 63730322553000LL);
AssertConversion(type, "1900-02-28 12:34:56", -2203932304000LL);
AssertConversion(type, "2018-11-13T17:11:10.777Z", 1542129070777LL);
AssertConversion(type, "1900-02-28 12:34:56.1", -2203932304000LL + 100LL);
AssertConversion(type, "1900-02-28 12:34:56.12", -2203932304000LL + 120LL);
AssertConversion(type, "1900-02-28 12:34:56.123", -2203932304000LL + 123LL);
// Invalid subseconds
AssertConversionFails(type, "1900-02-28 12:34:56.1234");
AssertConversionFails(type, "1900-02-28 12:34:56.12345");
AssertConversionFails(type, "1900-02-28 12:34:56.123456");
AssertConversionFails(type, "1900-02-28 12:34:56.1234567");
AssertConversionFails(type, "1900-02-28 12:34:56.12345678");
AssertConversionFails(type, "1900-02-28 12:34:56.123456789");
}
{
TimestampType type{TimeUnit::MICRO};
AssertConversion(type, "2018-11-13 17:11:10", 1542129070000000LL);
AssertConversion(type, "2018-11-13T17:11:10Z", 1542129070000000LL);
AssertConversion(type, "3989-07-14T11:22:33Z", 63730322553000000LL);
AssertConversion(type, "1900-02-28 12:34:56", -2203932304000000LL);
AssertConversion(type, "2018-11-13T17:11:10.777000", 1542129070777000LL);
AssertConversion(type, "3989-07-14T11:22:33.000777Z", 63730322553000777LL);
AssertConversion(type, "1900-02-28 12:34:56.1", -2203932304000000LL + 100000LL);
AssertConversion(type, "1900-02-28 12:34:56.12", -2203932304000000LL + 120000LL);
AssertConversion(type, "1900-02-28 12:34:56.123", -2203932304000000LL + 123000LL);
AssertConversion(type, "1900-02-28 12:34:56.1234", -2203932304000000LL + 123400LL);
AssertConversion(type, "1900-02-28 12:34:56.12345", -2203932304000000LL + 123450LL);
AssertConversion(type, "1900-02-28 12:34:56.123456", -2203932304000000LL + 123456LL);
// Invalid subseconds
AssertConversionFails(type, "1900-02-28 12:34:56.1234567");
AssertConversionFails(type, "1900-02-28 12:34:56.12345678");
AssertConversionFails(type, "1900-02-28 12:34:56.123456789");
}
{
TimestampType type{TimeUnit::NANO};
AssertConversion(type, "2018-11-13 17:11:10", 1542129070000000000LL);
AssertConversion(type, "2018-11-13T17:11:10Z", 1542129070000000000LL);
AssertConversion(type, "1900-02-28 12:34:56", -2203932304000000000LL);
AssertConversion(type, "2018-11-13 17:11:10.777000000", 1542129070777000000LL);
AssertConversion(type, "2018-11-13T17:11:10.000777000Z", 1542129070000777000LL);
AssertConversion(type, "1969-12-31 23:59:59.999999999", -1);
AssertConversion(type, "1900-02-28 12:34:56.1", -2203932304000000000LL + 100000000LL);
AssertConversion(type, "1900-02-28 12:34:56.12",
-2203932304000000000LL + 120000000LL);
AssertConversion(type, "1900-02-28 12:34:56.123",
-2203932304000000000LL + 123000000LL);
AssertConversion(type, "1900-02-28 12:34:56.1234",
-2203932304000000000LL + 123400000LL);
AssertConversion(type, "1900-02-28 12:34:56.12345",
-2203932304000000000LL + 123450000LL);
AssertConversion(type, "1900-02-28 12:34:56.123456",
-2203932304000000000LL + 123456000LL);
AssertConversion(type, "1900-02-28 12:34:56.1234567",
-2203932304000000000LL + 123456700LL);
AssertConversion(type, "1900-02-28 12:34:56.12345678",
-2203932304000000000LL + 123456780LL);
AssertConversion(type, "1900-02-28 12:34:56.123456789",
-2203932304000000000LL + 123456789LL);
// Invalid subseconds
}
}
TEST(TimestampParser, StrptimeParser) {
std::string format = "%m/%d/%Y %H:%M:%S";
auto parser = TimestampParser::MakeStrptime(format);
struct Case {
std::string value;
std::string iso8601;
};
std::vector<Case> cases = {{"5/31/2000 12:34:56", "2000-05-31 12:34:56"},
{"5/31/2000 00:00:00", "2000-05-31 00:00:00"}};
std::vector<TimeUnit::type> units = {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO,
TimeUnit::NANO};
for (auto unit : units) {
for (const auto& case_ : cases) {
int64_t converted, expected;
ASSERT_TRUE((*parser)(case_.value.c_str(), case_.value.size(), unit, &converted));
ASSERT_TRUE(ParseTimestampISO8601(case_.iso8601.c_str(), case_.iso8601.size(), unit,
&expected));
ASSERT_EQ(expected, converted);
}
}
// Unparseable strings
std::vector<std::string> unparseables = {"foo", "5/1/2000", "5/1/2000 12:34:56:6"};
for (auto& value : unparseables) {
int64_t dummy;
ASSERT_FALSE((*parser)(value.c_str(), value.size(), TimeUnit::SECOND, &dummy));
}
}
} // namespace internal
} // namespace arrow