blob: c05f934376c2d3cbd53d3a1ad0954f331afe88ed [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <gtest/gtest.h>
#include <cstdint>
#include <iostream>
#include <vector>
#include "parquet/schema.h"
#include "parquet/types.h"
#include "parquet/util/comparison.h"
namespace parquet {
namespace test {
using parquet::schema::NodePtr;
using parquet::schema::PrimitiveNode;
static ByteArray ByteArrayFromString(const std::string& s) {
auto ptr = reinterpret_cast<const uint8_t*>(s.data());
return ByteArray(static_cast<uint32_t>(s.size()), ptr);
}
static FLBA FLBAFromString(const std::string& s) {
auto ptr = reinterpret_cast<const uint8_t*>(s.data());
return FLBA(ptr);
}
TEST(Comparison, signedByteArray) {
NodePtr node =
PrimitiveNode::Make("SignedByteArray", Repetition::REQUIRED, Type::BYTE_ARRAY);
ColumnDescriptor descr(node, 0, 0);
CompareDefaultByteArray less;
std::string s1 = "12345";
std::string s2 = "12345678";
ByteArray s1ba = ByteArrayFromString(s1);
ByteArray s2ba = ByteArrayFromString(s2);
ASSERT_TRUE(less(s1ba, s2ba));
// This is case where signed comparision UTF-8 (PARQUET-686) is incorrect
// This example is to only check signed comparison and not UTF-8.
s1 = u8"bügeln";
s2 = u8"braten";
s1ba = ByteArrayFromString(s1);
s2ba = ByteArrayFromString(s2);
ASSERT_TRUE(less(s1ba, s2ba));
}
TEST(Comparison, UnsignedByteArray) {
NodePtr node = PrimitiveNode::Make("UnsignedByteArray", Repetition::REQUIRED,
Type::BYTE_ARRAY, LogicalType::UTF8);
ColumnDescriptor descr(node, 0, 0);
// Check if UTF-8 is compared using unsigned correctly
CompareUnsignedByteArray uless;
std::string s1 = "arrange";
std::string s2 = "arrangement";
ByteArray s1ba = ByteArrayFromString(s1);
ByteArray s2ba = ByteArrayFromString(s2);
ASSERT_TRUE(uless(s1ba, s2ba));
// Multi-byte UTF-8 characters
s1 = u8"braten";
s2 = u8"bügeln";
s1ba = ByteArrayFromString(s1);
s2ba = ByteArrayFromString(s2);
ASSERT_TRUE(uless(s1ba, s2ba));
s1 = u8"ünk123456"; // ü = 252
s2 = u8"ănk123456"; // ă = 259
s1ba = ByteArrayFromString(s1);
s2ba = ByteArrayFromString(s2);
ASSERT_TRUE(uless(s1ba, s2ba));
}
TEST(Comparison, SignedFLBA) {
int size = 10;
NodePtr node = PrimitiveNode::Make("SignedFLBA", Repetition::REQUIRED,
Type::FIXED_LEN_BYTE_ARRAY, LogicalType::NONE, size);
ColumnDescriptor descr(node, 0, 0);
CompareDefaultFLBA less(descr.type_length());
std::string s1 = "Anti123456";
std::string s2 = "Bunkd123456";
FLBA s1flba = FLBAFromString(s1);
FLBA s2flba = FLBAFromString(s2);
ASSERT_TRUE(less(s1flba, s2flba));
s1 = "Bünk123456";
s2 = "Bunk123456";
s1flba = FLBAFromString(s1);
s2flba = FLBAFromString(s2);
ASSERT_TRUE(less(s1flba, s2flba));
}
TEST(Comparison, UnsignedFLBA) {
int size = 10;
NodePtr node = PrimitiveNode::Make("UnsignedFLBA", Repetition::REQUIRED,
Type::FIXED_LEN_BYTE_ARRAY, LogicalType::NONE, size);
ColumnDescriptor descr(node, 0, 0);
CompareUnsignedFLBA uless(descr.type_length());
std::string s1 = "Anti123456";
std::string s2 = "Bunkd123456";
FLBA s1flba = FLBAFromString(s1);
FLBA s2flba = FLBAFromString(s2);
ASSERT_TRUE(uless(s1flba, s2flba));
s1 = "Bunk123456";
s2 = "Bünk123456";
s1flba = FLBAFromString(s1);
s2flba = FLBAFromString(s2);
ASSERT_TRUE(uless(s1flba, s2flba));
}
TEST(Comparison, SignedInt96) {
parquet::Int96 a{{1, 41, 14}}, b{{1, 41, 42}};
parquet::Int96 aa{{1, 41, 14}}, bb{{1, 41, 14}};
parquet::Int96 aaa{{1, 41, static_cast<uint32_t>(-14)}}, bbb{{1, 41, 42}};
NodePtr node = PrimitiveNode::Make("SignedInt96", Repetition::REQUIRED, Type::INT96);
ColumnDescriptor descr(node, 0, 0);
CompareDefaultInt96 less;
ASSERT_TRUE(less(a, b));
ASSERT_TRUE(!less(aa, bb) && !less(bb, aa));
ASSERT_TRUE(less(aaa, bbb));
}
TEST(Comparison, UnsignedInt96) {
parquet::Int96 a{{1, 41, 14}}, b{{1, static_cast<uint32_t>(-41), 42}};
parquet::Int96 aa{{1, 41, 14}}, bb{{1, 41, static_cast<uint32_t>(-14)}};
parquet::Int96 aaa, bbb;
NodePtr node = PrimitiveNode::Make("UnsignedInt96", Repetition::REQUIRED, Type::INT96);
ColumnDescriptor descr(node, 0, 0);
CompareUnsignedInt96 uless;
ASSERT_TRUE(uless(a, b));
ASSERT_TRUE(uless(aa, bb));
// INT96 Timestamp
aaa.value[2] = 2451545; // 2000-01-01
bbb.value[2] = 2451546; // 2000-01-02
// 12 hours + 34 minutes + 56 seconds.
reinterpret_cast<uint64_t*>(&aaa.value[0])[0] = 45296000000000;
// 12 hours + 34 minutes + 50 seconds.
reinterpret_cast<uint64_t*>(&bbb.value[0])[0] = 45290000000000;
ASSERT_TRUE(uless(aaa, bbb));
aaa.value[2] = 2451545; // 2000-01-01
bbb.value[2] = 2451545; // 2000-01-01
// 11 hours + 34 minutes + 56 seconds.
reinterpret_cast<uint64_t*>(&aaa.value[0])[0] = 41696000000000;
// 12 hours + 34 minutes + 50 seconds.
reinterpret_cast<uint64_t*>(&bbb.value[0])[0] = 45290000000000;
ASSERT_TRUE(uless(aaa, bbb));
aaa.value[2] = 2451545; // 2000-01-01
bbb.value[2] = 2451545; // 2000-01-01
// 12 hours + 34 minutes + 55 seconds.
reinterpret_cast<uint64_t*>(&aaa.value[0])[0] = 45295000000000;
// 12 hours + 34 minutes + 56 seconds.
reinterpret_cast<uint64_t*>(&bbb.value[0])[0] = 45296000000000;
ASSERT_TRUE(uless(aaa, bbb));
}
TEST(Comparison, SignedInt64) {
int64_t a = 1, b = 4;
int64_t aa = 1, bb = 1;
int64_t aaa = -1, bbb = 1;
NodePtr node = PrimitiveNode::Make("SignedInt64", Repetition::REQUIRED, Type::INT64);
ColumnDescriptor descr(node, 0, 0);
CompareDefaultInt64 less;
ASSERT_TRUE(less(a, b));
ASSERT_TRUE(!less(aa, bb) && !less(bb, aa));
ASSERT_TRUE(less(aaa, bbb));
}
TEST(Comparison, UnsignedInt64) {
uint64_t a = 1, b = 4;
uint64_t aa = 1, bb = 1;
uint64_t aaa = 1, bbb = -1;
NodePtr node = PrimitiveNode::Make("UnsignedInt64", Repetition::REQUIRED, Type::INT64);
ColumnDescriptor descr(node, 0, 0);
CompareUnsignedInt64 less;
ASSERT_TRUE(less(a, b));
ASSERT_TRUE(!less(aa, bb) && !less(bb, aa));
ASSERT_TRUE(less(aaa, bbb));
}
TEST(Comparison, UnsignedInt32) {
uint32_t a = 1, b = 4;
uint32_t aa = 1, bb = 1;
uint32_t aaa = 1, bbb = -1;
NodePtr node = PrimitiveNode::Make("UnsignedInt32", Repetition::REQUIRED, Type::INT32);
ColumnDescriptor descr(node, 0, 0);
CompareUnsignedInt32 less;
ASSERT_TRUE(less(a, b));
ASSERT_TRUE(!less(aa, bb) && !less(bb, aa));
ASSERT_TRUE(less(aaa, bbb));
}
TEST(Comparison, UnknownSortOrder) {
NodePtr node =
PrimitiveNode::Make("Unknown", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
LogicalType::INTERVAL, 12);
ColumnDescriptor descr(node, 0, 0);
ASSERT_THROW(Comparator::Make(&descr), ParquetException);
}
} // namespace test
} // namespace parquet