blob: 8906f691f1a398fec289f91a12d1a8a5f49dd852 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <gtest/gtest.h>
#include <cstdlib>
#include <cstring>
#include <functional>
#include <iosfwd>
#include <memory>
#include <string>
#include <vector>
#include "arrow/util/checked_cast.h"
#include "parquet/exception.h"
#include "parquet/schema.h"
#include "parquet/schema_internal.h"
#include "parquet/thrift_internal.h"
#include "parquet/types.h"
using ::arrow::internal::checked_cast;
namespace parquet {
using format::FieldRepetitionType;
using format::SchemaElement;
namespace schema {
static inline SchemaElement NewPrimitive(const std::string& name,
FieldRepetitionType::type repetition,
Type::type type, int id = 0) {
SchemaElement result;
result.__set_name(name);
result.__set_repetition_type(repetition);
result.__set_type(static_cast<format::Type::type>(type));
return result;
}
static inline SchemaElement NewGroup(const std::string& name,
FieldRepetitionType::type repetition,
int num_children, int id = 0) {
SchemaElement result;
result.__set_name(name);
result.__set_repetition_type(repetition);
result.__set_num_children(num_children);
return result;
}
// ----------------------------------------------------------------------
// ColumnPath
TEST(TestColumnPath, TestAttrs) {
ColumnPath path(std::vector<std::string>({"toplevel", "leaf"}));
ASSERT_EQ(path.ToDotString(), "toplevel.leaf");
std::shared_ptr<ColumnPath> path_ptr = ColumnPath::FromDotString("toplevel.leaf");
ASSERT_EQ(path_ptr->ToDotString(), "toplevel.leaf");
std::shared_ptr<ColumnPath> extended = path_ptr->extend("anotherlevel");
ASSERT_EQ(extended->ToDotString(), "toplevel.leaf.anotherlevel");
}
// ----------------------------------------------------------------------
// Primitive node
class TestPrimitiveNode : public ::testing::Test {
public:
void SetUp() {
name_ = "name";
id_ = 5;
}
void Convert(const format::SchemaElement* element) {
node_ = PrimitiveNode::FromParquet(element, id_);
ASSERT_TRUE(node_->is_primitive());
prim_node_ = static_cast<const PrimitiveNode*>(node_.get());
}
protected:
std::string name_;
const PrimitiveNode* prim_node_;
int id_;
std::unique_ptr<Node> node_;
};
TEST_F(TestPrimitiveNode, Attrs) {
PrimitiveNode node1("foo", Repetition::REPEATED, Type::INT32);
PrimitiveNode node2("bar", Repetition::OPTIONAL, Type::BYTE_ARRAY, ConvertedType::UTF8);
ASSERT_EQ("foo", node1.name());
ASSERT_TRUE(node1.is_primitive());
ASSERT_FALSE(node1.is_group());
ASSERT_EQ(Repetition::REPEATED, node1.repetition());
ASSERT_EQ(Repetition::OPTIONAL, node2.repetition());
ASSERT_EQ(Node::PRIMITIVE, node1.node_type());
ASSERT_EQ(Type::INT32, node1.physical_type());
ASSERT_EQ(Type::BYTE_ARRAY, node2.physical_type());
// logical types
ASSERT_EQ(ConvertedType::NONE, node1.converted_type());
ASSERT_EQ(ConvertedType::UTF8, node2.converted_type());
// repetition
PrimitiveNode node3("foo", Repetition::REPEATED, Type::INT32);
PrimitiveNode node4("foo", Repetition::REQUIRED, Type::INT32);
PrimitiveNode node5("foo", Repetition::OPTIONAL, Type::INT32);
ASSERT_TRUE(node3.is_repeated());
ASSERT_FALSE(node3.is_optional());
ASSERT_TRUE(node4.is_required());
ASSERT_TRUE(node5.is_optional());
ASSERT_FALSE(node5.is_required());
}
TEST_F(TestPrimitiveNode, FromParquet) {
SchemaElement elt = NewPrimitive(name_, FieldRepetitionType::OPTIONAL, Type::INT32, 0);
ASSERT_NO_FATAL_FAILURE(Convert(&elt));
ASSERT_EQ(name_, prim_node_->name());
ASSERT_EQ(id_, prim_node_->id());
ASSERT_EQ(Repetition::OPTIONAL, prim_node_->repetition());
ASSERT_EQ(Type::INT32, prim_node_->physical_type());
ASSERT_EQ(ConvertedType::NONE, prim_node_->converted_type());
// Test a logical type
elt = NewPrimitive(name_, FieldRepetitionType::REQUIRED, Type::BYTE_ARRAY, 0);
elt.__set_converted_type(format::ConvertedType::UTF8);
ASSERT_NO_FATAL_FAILURE(Convert(&elt));
ASSERT_EQ(Repetition::REQUIRED, prim_node_->repetition());
ASSERT_EQ(Type::BYTE_ARRAY, prim_node_->physical_type());
ASSERT_EQ(ConvertedType::UTF8, prim_node_->converted_type());
// FIXED_LEN_BYTE_ARRAY
elt = NewPrimitive(name_, FieldRepetitionType::OPTIONAL, Type::FIXED_LEN_BYTE_ARRAY, 0);
elt.__set_type_length(16);
ASSERT_NO_FATAL_FAILURE(Convert(&elt));
ASSERT_EQ(name_, prim_node_->name());
ASSERT_EQ(id_, prim_node_->id());
ASSERT_EQ(Repetition::OPTIONAL, prim_node_->repetition());
ASSERT_EQ(Type::FIXED_LEN_BYTE_ARRAY, prim_node_->physical_type());
ASSERT_EQ(16, prim_node_->type_length());
// format::ConvertedType::Decimal
elt = NewPrimitive(name_, FieldRepetitionType::OPTIONAL, Type::FIXED_LEN_BYTE_ARRAY, 0);
elt.__set_converted_type(format::ConvertedType::DECIMAL);
elt.__set_type_length(6);
elt.__set_scale(2);
elt.__set_precision(12);
ASSERT_NO_FATAL_FAILURE(Convert(&elt));
ASSERT_EQ(Type::FIXED_LEN_BYTE_ARRAY, prim_node_->physical_type());
ASSERT_EQ(ConvertedType::DECIMAL, prim_node_->converted_type());
ASSERT_EQ(6, prim_node_->type_length());
ASSERT_EQ(2, prim_node_->decimal_metadata().scale);
ASSERT_EQ(12, prim_node_->decimal_metadata().precision);
}
TEST_F(TestPrimitiveNode, Equals) {
PrimitiveNode node1("foo", Repetition::REQUIRED, Type::INT32);
PrimitiveNode node2("foo", Repetition::REQUIRED, Type::INT64);
PrimitiveNode node3("bar", Repetition::REQUIRED, Type::INT32);
PrimitiveNode node4("foo", Repetition::OPTIONAL, Type::INT32);
PrimitiveNode node5("foo", Repetition::REQUIRED, Type::INT32);
ASSERT_TRUE(node1.Equals(&node1));
ASSERT_FALSE(node1.Equals(&node2));
ASSERT_FALSE(node1.Equals(&node3));
ASSERT_FALSE(node1.Equals(&node4));
ASSERT_TRUE(node1.Equals(&node5));
PrimitiveNode flba1("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
ConvertedType::DECIMAL, 12, 4, 2);
PrimitiveNode flba2("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
ConvertedType::DECIMAL, 1, 4, 2);
flba2.SetTypeLength(12);
PrimitiveNode flba3("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
ConvertedType::DECIMAL, 1, 4, 2);
flba3.SetTypeLength(16);
PrimitiveNode flba4("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
ConvertedType::DECIMAL, 12, 4, 0);
PrimitiveNode flba5("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
ConvertedType::NONE, 12, 4, 0);
ASSERT_TRUE(flba1.Equals(&flba2));
ASSERT_FALSE(flba1.Equals(&flba3));
ASSERT_FALSE(flba1.Equals(&flba4));
ASSERT_FALSE(flba1.Equals(&flba5));
}
TEST_F(TestPrimitiveNode, PhysicalLogicalMapping) {
ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT32,
ConvertedType::INT_32));
ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::BYTE_ARRAY,
ConvertedType::JSON));
ASSERT_THROW(
PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT32, ConvertedType::JSON),
ParquetException);
ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT64,
ConvertedType::TIMESTAMP_MILLIS));
ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT32,
ConvertedType::INT_64),
ParquetException);
ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::BYTE_ARRAY,
ConvertedType::INT_8),
ParquetException);
ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::BYTE_ARRAY,
ConvertedType::INTERVAL),
ParquetException);
ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
Type::FIXED_LEN_BYTE_ARRAY, ConvertedType::ENUM),
ParquetException);
ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::BYTE_ARRAY,
ConvertedType::ENUM));
ASSERT_THROW(
PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
ConvertedType::DECIMAL, 0, 2, 4),
ParquetException);
ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FLOAT,
ConvertedType::DECIMAL, 0, 2, 4),
ParquetException);
ASSERT_THROW(
PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
ConvertedType::DECIMAL, 0, 4, 0),
ParquetException);
ASSERT_THROW(
PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
ConvertedType::DECIMAL, 10, 0, 4),
ParquetException);
ASSERT_THROW(
PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
ConvertedType::DECIMAL, 10, 4, -1),
ParquetException);
ASSERT_THROW(
PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
ConvertedType::DECIMAL, 10, 2, 4),
ParquetException);
ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
Type::FIXED_LEN_BYTE_ARRAY, ConvertedType::DECIMAL,
10, 6, 4));
ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
Type::FIXED_LEN_BYTE_ARRAY, ConvertedType::INTERVAL,
12));
ASSERT_THROW(
PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
ConvertedType::INTERVAL, 10),
ParquetException);
}
// ----------------------------------------------------------------------
// Group node
class TestGroupNode : public ::testing::Test {
public:
NodeVector Fields1() {
NodeVector fields;
fields.push_back(Int32("one", Repetition::REQUIRED));
fields.push_back(Int64("two"));
fields.push_back(Double("three"));
return fields;
}
NodeVector Fields2() {
// Fields with a duplicate name
NodeVector fields;
fields.push_back(Int32("duplicate", Repetition::REQUIRED));
fields.push_back(Int64("unique"));
fields.push_back(Double("duplicate"));
return fields;
}
};
TEST_F(TestGroupNode, Attrs) {
NodeVector fields = Fields1();
GroupNode node1("foo", Repetition::REPEATED, fields);
GroupNode node2("bar", Repetition::OPTIONAL, fields, ConvertedType::LIST);
ASSERT_EQ("foo", node1.name());
ASSERT_TRUE(node1.is_group());
ASSERT_FALSE(node1.is_primitive());
ASSERT_EQ(fields.size(), node1.field_count());
ASSERT_TRUE(node1.is_repeated());
ASSERT_TRUE(node2.is_optional());
ASSERT_EQ(Repetition::REPEATED, node1.repetition());
ASSERT_EQ(Repetition::OPTIONAL, node2.repetition());
ASSERT_EQ(Node::GROUP, node1.node_type());
// logical types
ASSERT_EQ(ConvertedType::NONE, node1.converted_type());
ASSERT_EQ(ConvertedType::LIST, node2.converted_type());
}
TEST_F(TestGroupNode, Equals) {
NodeVector f1 = Fields1();
NodeVector f2 = Fields1();
GroupNode group1("group", Repetition::REPEATED, f1);
GroupNode group2("group", Repetition::REPEATED, f2);
GroupNode group3("group2", Repetition::REPEATED, f2);
// This is copied in the GroupNode ctor, so this is okay
f2.push_back(Float("four", Repetition::OPTIONAL));
GroupNode group4("group", Repetition::REPEATED, f2);
GroupNode group5("group", Repetition::REPEATED, Fields1());
ASSERT_TRUE(group1.Equals(&group1));
ASSERT_TRUE(group1.Equals(&group2));
ASSERT_FALSE(group1.Equals(&group3));
ASSERT_FALSE(group1.Equals(&group4));
ASSERT_FALSE(group5.Equals(&group4));
}
TEST_F(TestGroupNode, FieldIndex) {
NodeVector fields = Fields1();
GroupNode group("group", Repetition::REQUIRED, fields);
for (size_t i = 0; i < fields.size(); i++) {
auto field = group.field(static_cast<int>(i));
ASSERT_EQ(i, group.FieldIndex(*field));
}
// Test a non field node
auto non_field_alien = Int32("alien", Repetition::REQUIRED); // other name
auto non_field_familiar = Int32("one", Repetition::REPEATED); // other node
ASSERT_LT(group.FieldIndex(*non_field_alien), 0);
ASSERT_LT(group.FieldIndex(*non_field_familiar), 0);
}
TEST_F(TestGroupNode, FieldIndexDuplicateName) {
NodeVector fields = Fields2();
GroupNode group("group", Repetition::REQUIRED, fields);
for (size_t i = 0; i < fields.size(); i++) {
auto field = group.field(static_cast<int>(i));
ASSERT_EQ(i, group.FieldIndex(*field));
}
}
// ----------------------------------------------------------------------
// Test convert group
class TestSchemaConverter : public ::testing::Test {
public:
void setUp() { name_ = "parquet_schema"; }
void Convert(const parquet::format::SchemaElement* elements, int length) {
FlatSchemaConverter converter(elements, length);
node_ = converter.Convert();
ASSERT_TRUE(node_->is_group());
group_ = static_cast<const GroupNode*>(node_.get());
}
protected:
std::string name_;
const GroupNode* group_;
std::unique_ptr<Node> node_;
};
bool check_for_parent_consistency(const GroupNode* node) {
// Each node should have the group as parent
for (int i = 0; i < node->field_count(); i++) {
const NodePtr& field = node->field(i);
if (field->parent() != node) {
return false;
}
if (field->is_group()) {
const GroupNode* group = static_cast<GroupNode*>(field.get());
if (!check_for_parent_consistency(group)) {
return false;
}
}
}
return true;
}
TEST_F(TestSchemaConverter, NestedExample) {
SchemaElement elt;
std::vector<SchemaElement> elements;
elements.push_back(NewGroup(name_, FieldRepetitionType::REPEATED, 2, 0));
// A primitive one
elements.push_back(NewPrimitive("a", FieldRepetitionType::REQUIRED, Type::INT32, 1));
// A group
elements.push_back(NewGroup("bag", FieldRepetitionType::OPTIONAL, 1, 2));
// 3-level list encoding, by hand
elt = NewGroup("b", FieldRepetitionType::REPEATED, 1, 3);
elt.__set_converted_type(format::ConvertedType::LIST);
elements.push_back(elt);
elements.push_back(NewPrimitive("item", FieldRepetitionType::OPTIONAL, Type::INT64, 4));
ASSERT_NO_FATAL_FAILURE(Convert(&elements[0], static_cast<int>(elements.size())));
// Construct the expected schema
NodeVector fields;
fields.push_back(Int32("a", Repetition::REQUIRED));
// 3-level list encoding
NodePtr item = Int64("item");
NodePtr list(GroupNode::Make("b", Repetition::REPEATED, {item}, ConvertedType::LIST));
NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list}));
fields.push_back(bag);
NodePtr schema = GroupNode::Make(name_, Repetition::REPEATED, fields);
ASSERT_TRUE(schema->Equals(group_));
// Check that the parent relationship in each node is consistent
ASSERT_EQ(group_->parent(), nullptr);
ASSERT_TRUE(check_for_parent_consistency(group_));
}
TEST_F(TestSchemaConverter, ZeroColumns) {
// ARROW-3843
SchemaElement elements[1];
elements[0] = NewGroup("schema", FieldRepetitionType::REPEATED, 0, 0);
ASSERT_NO_THROW(Convert(elements, 1));
}
TEST_F(TestSchemaConverter, InvalidRoot) {
// According to the Parquet specification, the first element in the
// list<SchemaElement> is a group whose children (and their descendants)
// contain all of the rest of the flattened schema elements. If the first
// element is not a group, it is a malformed Parquet file.
SchemaElement elements[2];
elements[0] =
NewPrimitive("not-a-group", FieldRepetitionType::REQUIRED, Type::INT32, 0);
ASSERT_THROW(Convert(elements, 2), ParquetException);
// While the Parquet spec indicates that the root group should have REPEATED
// repetition type, some implementations may return REQUIRED or OPTIONAL
// groups as the first element. These tests check that this is okay as a
// practicality matter.
elements[0] = NewGroup("not-repeated", FieldRepetitionType::REQUIRED, 1, 0);
elements[1] = NewPrimitive("a", FieldRepetitionType::REQUIRED, Type::INT32, 1);
ASSERT_NO_FATAL_FAILURE(Convert(elements, 2));
elements[0] = NewGroup("not-repeated", FieldRepetitionType::OPTIONAL, 1, 0);
ASSERT_NO_FATAL_FAILURE(Convert(elements, 2));
}
TEST_F(TestSchemaConverter, NotEnoughChildren) {
// Throw a ParquetException, but don't core dump or anything
SchemaElement elt;
std::vector<SchemaElement> elements;
elements.push_back(NewGroup(name_, FieldRepetitionType::REPEATED, 2, 0));
ASSERT_THROW(Convert(&elements[0], 1), ParquetException);
}
// ----------------------------------------------------------------------
// Schema tree flatten / unflatten
class TestSchemaFlatten : public ::testing::Test {
public:
void setUp() { name_ = "parquet_schema"; }
void Flatten(const GroupNode* schema) { ToParquet(schema, &elements_); }
protected:
std::string name_;
std::vector<format::SchemaElement> elements_;
};
TEST_F(TestSchemaFlatten, DecimalMetadata) {
// Checks that DecimalMetadata is only set for DecimalTypes
NodePtr node = PrimitiveNode::Make("decimal", Repetition::REQUIRED, Type::INT64,
ConvertedType::DECIMAL, -1, 8, 4);
NodePtr group =
GroupNode::Make("group", Repetition::REPEATED, {node}, ConvertedType::LIST);
Flatten(reinterpret_cast<GroupNode*>(group.get()));
ASSERT_EQ("decimal", elements_[1].name);
ASSERT_TRUE(elements_[1].__isset.precision);
ASSERT_TRUE(elements_[1].__isset.scale);
elements_.clear();
// ... including those created with new logical types
node = PrimitiveNode::Make("decimal", Repetition::REQUIRED,
DecimalLogicalType::Make(10, 5), Type::INT64, -1);
group = GroupNode::Make("group", Repetition::REPEATED, {node}, ListLogicalType::Make());
Flatten(reinterpret_cast<GroupNode*>(group.get()));
ASSERT_EQ("decimal", elements_[1].name);
ASSERT_TRUE(elements_[1].__isset.precision);
ASSERT_TRUE(elements_[1].__isset.scale);
elements_.clear();
// Not for integers with no logical type
group = GroupNode::Make("group", Repetition::REPEATED, {Int64("int64")},
ConvertedType::LIST);
Flatten(reinterpret_cast<GroupNode*>(group.get()));
ASSERT_EQ("int64", elements_[1].name);
ASSERT_FALSE(elements_[0].__isset.precision);
ASSERT_FALSE(elements_[0].__isset.scale);
}
TEST_F(TestSchemaFlatten, NestedExample) {
SchemaElement elt;
std::vector<SchemaElement> elements;
elements.push_back(NewGroup(name_, FieldRepetitionType::REPEATED, 2, 0));
// A primitive one
elements.push_back(NewPrimitive("a", FieldRepetitionType::REQUIRED, Type::INT32, 1));
// A group
elements.push_back(NewGroup("bag", FieldRepetitionType::OPTIONAL, 1, 2));
// 3-level list encoding, by hand
elt = NewGroup("b", FieldRepetitionType::REPEATED, 1, 3);
elt.__set_converted_type(format::ConvertedType::LIST);
format::ListType ls;
format::LogicalType lt;
lt.__set_LIST(ls);
elt.__set_logicalType(lt);
elements.push_back(elt);
elements.push_back(NewPrimitive("item", FieldRepetitionType::OPTIONAL, Type::INT64, 4));
// Construct the schema
NodeVector fields;
fields.push_back(Int32("a", Repetition::REQUIRED));
// 3-level list encoding
NodePtr item = Int64("item");
NodePtr list(GroupNode::Make("b", Repetition::REPEATED, {item}, ConvertedType::LIST));
NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list}));
fields.push_back(bag);
NodePtr schema = GroupNode::Make(name_, Repetition::REPEATED, fields);
Flatten(static_cast<GroupNode*>(schema.get()));
ASSERT_EQ(elements_.size(), elements.size());
for (size_t i = 0; i < elements_.size(); i++) {
ASSERT_EQ(elements_[i], elements[i]);
}
}
TEST(TestColumnDescriptor, TestAttrs) {
NodePtr node = PrimitiveNode::Make("name", Repetition::OPTIONAL, Type::BYTE_ARRAY,
ConvertedType::UTF8);
ColumnDescriptor descr(node, 4, 1);
ASSERT_EQ("name", descr.name());
ASSERT_EQ(4, descr.max_definition_level());
ASSERT_EQ(1, descr.max_repetition_level());
ASSERT_EQ(Type::BYTE_ARRAY, descr.physical_type());
ASSERT_EQ(-1, descr.type_length());
const char* expected_descr = R"(column descriptor = {
name: name,
path: ,
physical_type: BYTE_ARRAY,
converted_type: UTF8,
logical_type: String,
max_definition_level: 4,
max_repetition_level: 1,
})";
ASSERT_EQ(expected_descr, descr.ToString());
// Test FIXED_LEN_BYTE_ARRAY
node = PrimitiveNode::Make("name", Repetition::OPTIONAL, Type::FIXED_LEN_BYTE_ARRAY,
ConvertedType::DECIMAL, 12, 10, 4);
ColumnDescriptor descr2(node, 4, 1);
ASSERT_EQ(Type::FIXED_LEN_BYTE_ARRAY, descr2.physical_type());
ASSERT_EQ(12, descr2.type_length());
expected_descr = R"(column descriptor = {
name: name,
path: ,
physical_type: FIXED_LEN_BYTE_ARRAY,
converted_type: DECIMAL,
logical_type: Decimal(precision=10, scale=4),
max_definition_level: 4,
max_repetition_level: 1,
length: 12,
precision: 10,
scale: 4,
})";
ASSERT_EQ(expected_descr, descr2.ToString());
}
class TestSchemaDescriptor : public ::testing::Test {
public:
void setUp() {}
protected:
SchemaDescriptor descr_;
};
TEST_F(TestSchemaDescriptor, InitNonGroup) {
NodePtr node = PrimitiveNode::Make("field", Repetition::OPTIONAL, Type::INT32);
ASSERT_THROW(descr_.Init(node), ParquetException);
}
TEST_F(TestSchemaDescriptor, Equals) {
NodePtr schema;
NodePtr inta = Int32("a", Repetition::REQUIRED);
NodePtr intb = Int64("b", Repetition::OPTIONAL);
NodePtr intb2 = Int64("b2", Repetition::OPTIONAL);
NodePtr intc = ByteArray("c", Repetition::REPEATED);
NodePtr item1 = Int64("item1", Repetition::REQUIRED);
NodePtr item2 = Boolean("item2", Repetition::OPTIONAL);
NodePtr item3 = Int32("item3", Repetition::REPEATED);
NodePtr list(GroupNode::Make("records", Repetition::REPEATED, {item1, item2, item3},
ConvertedType::LIST));
NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list}));
NodePtr bag2(GroupNode::Make("bag", Repetition::REQUIRED, {list}));
SchemaDescriptor descr1;
descr1.Init(GroupNode::Make("schema", Repetition::REPEATED, {inta, intb, intc, bag}));
ASSERT_TRUE(descr1.Equals(descr1));
SchemaDescriptor descr2;
descr2.Init(GroupNode::Make("schema", Repetition::REPEATED, {inta, intb, intc, bag2}));
ASSERT_FALSE(descr1.Equals(descr2));
SchemaDescriptor descr3;
descr3.Init(GroupNode::Make("schema", Repetition::REPEATED, {inta, intb2, intc, bag}));
ASSERT_FALSE(descr1.Equals(descr3));
// Robust to name of parent node
SchemaDescriptor descr4;
descr4.Init(GroupNode::Make("SCHEMA", Repetition::REPEATED, {inta, intb, intc, bag}));
ASSERT_TRUE(descr1.Equals(descr4));
SchemaDescriptor descr5;
descr5.Init(
GroupNode::Make("schema", Repetition::REPEATED, {inta, intb, intc, bag, intb2}));
ASSERT_FALSE(descr1.Equals(descr5));
// Different max repetition / definition levels
ColumnDescriptor col1(inta, 5, 1);
ColumnDescriptor col2(inta, 6, 1);
ColumnDescriptor col3(inta, 5, 2);
ASSERT_TRUE(col1.Equals(col1));
ASSERT_FALSE(col1.Equals(col2));
ASSERT_FALSE(col1.Equals(col3));
}
TEST_F(TestSchemaDescriptor, BuildTree) {
NodeVector fields;
NodePtr schema;
NodePtr inta = Int32("a", Repetition::REQUIRED);
fields.push_back(inta);
fields.push_back(Int64("b", Repetition::OPTIONAL));
fields.push_back(ByteArray("c", Repetition::REPEATED));
// 3-level list encoding
NodePtr item1 = Int64("item1", Repetition::REQUIRED);
NodePtr item2 = Boolean("item2", Repetition::OPTIONAL);
NodePtr item3 = Int32("item3", Repetition::REPEATED);
NodePtr list(GroupNode::Make("records", Repetition::REPEATED, {item1, item2, item3},
ConvertedType::LIST));
NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list}));
fields.push_back(bag);
schema = GroupNode::Make("schema", Repetition::REPEATED, fields);
descr_.Init(schema);
int nleaves = 6;
// 6 leaves
ASSERT_EQ(nleaves, descr_.num_columns());
// mdef mrep
// required int32 a 0 0
// optional int64 b 1 0
// repeated byte_array c 1 1
// optional group bag 1 0
// repeated group records 2 1
// required int64 item1 2 1
// optional boolean item2 3 1
// repeated int32 item3 3 2
int16_t ex_max_def_levels[6] = {0, 1, 1, 2, 3, 3};
int16_t ex_max_rep_levels[6] = {0, 0, 1, 1, 1, 2};
for (int i = 0; i < nleaves; ++i) {
const ColumnDescriptor* col = descr_.Column(i);
EXPECT_EQ(ex_max_def_levels[i], col->max_definition_level()) << i;
EXPECT_EQ(ex_max_rep_levels[i], col->max_repetition_level()) << i;
}
ASSERT_EQ(descr_.Column(0)->path()->ToDotString(), "a");
ASSERT_EQ(descr_.Column(1)->path()->ToDotString(), "b");
ASSERT_EQ(descr_.Column(2)->path()->ToDotString(), "c");
ASSERT_EQ(descr_.Column(3)->path()->ToDotString(), "bag.records.item1");
ASSERT_EQ(descr_.Column(4)->path()->ToDotString(), "bag.records.item2");
ASSERT_EQ(descr_.Column(5)->path()->ToDotString(), "bag.records.item3");
for (int i = 0; i < nleaves; ++i) {
auto col = descr_.Column(i);
ASSERT_EQ(i, descr_.ColumnIndex(*col->schema_node()));
}
// Test non-column nodes find
NodePtr non_column_alien = Int32("alien", Repetition::REQUIRED); // other path
NodePtr non_column_familiar = Int32("a", Repetition::REPEATED); // other node
ASSERT_LT(descr_.ColumnIndex(*non_column_alien), 0);
ASSERT_LT(descr_.ColumnIndex(*non_column_familiar), 0);
ASSERT_EQ(inta.get(), descr_.GetColumnRoot(0));
ASSERT_EQ(bag.get(), descr_.GetColumnRoot(3));
ASSERT_EQ(bag.get(), descr_.GetColumnRoot(4));
ASSERT_EQ(bag.get(), descr_.GetColumnRoot(5));
ASSERT_EQ(schema.get(), descr_.group_node());
// Init clears the leaves
descr_.Init(schema);
ASSERT_EQ(nleaves, descr_.num_columns());
}
static std::string Print(const NodePtr& node) {
std::stringstream ss;
PrintSchema(node.get(), ss);
return ss.str();
}
TEST(TestSchemaPrinter, Examples) {
// Test schema 1
NodeVector fields;
fields.push_back(Int32("a", Repetition::REQUIRED));
// 3-level list encoding
NodePtr item1 = Int64("item1");
NodePtr item2 = Boolean("item2", Repetition::REQUIRED);
NodePtr list(
GroupNode::Make("b", Repetition::REPEATED, {item1, item2}, ConvertedType::LIST));
NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list}));
fields.push_back(bag);
fields.push_back(PrimitiveNode::Make("c", Repetition::REQUIRED, Type::INT32,
ConvertedType::DECIMAL, -1, 3, 2));
fields.push_back(PrimitiveNode::Make("d", Repetition::REQUIRED,
DecimalLogicalType::Make(10, 5), Type::INT64, -1));
NodePtr schema = GroupNode::Make("schema", Repetition::REPEATED, fields);
std::string result = Print(schema);
std::string expected = R"(message schema {
required int32 a;
optional group bag {
repeated group b (List) {
optional int64 item1;
required boolean item2;
}
}
required int32 c (Decimal(precision=3, scale=2));
required int64 d (Decimal(precision=10, scale=5));
}
)";
ASSERT_EQ(expected, result);
}
static void ConfirmFactoryEquivalence(
ConvertedType::type converted_type,
const std::shared_ptr<const LogicalType>& from_make,
std::function<bool(const std::shared_ptr<const LogicalType>&)> check_is_type) {
std::shared_ptr<const LogicalType> from_converted_type =
LogicalType::FromConvertedType(converted_type);
ASSERT_EQ(from_converted_type->type(), from_make->type())
<< from_make->ToString() << " logical types unexpectedly do not match on type";
ASSERT_TRUE(from_converted_type->Equals(*from_make))
<< from_make->ToString() << " logical types unexpectedly not equivalent";
ASSERT_TRUE(check_is_type(from_converted_type))
<< from_converted_type->ToString()
<< " logical type (from converted type) does not have expected type property";
ASSERT_TRUE(check_is_type(from_make))
<< from_make->ToString()
<< " logical type (from Make()) does not have expected type property";
return;
}
TEST(TestLogicalTypeConstruction, FactoryEquivalence) {
// For each legacy converted type, ensure that the equivalent logical type object
// can be obtained from either the base class's FromConvertedType() factory method or
// the logical type type class's Make() method (accessed via convenience methods on the
// base class) and that these logical type objects are equivalent
struct ConfirmFactoryEquivalenceArguments {
ConvertedType::type converted_type;
std::shared_ptr<const LogicalType> logical_type;
std::function<bool(const std::shared_ptr<const LogicalType>&)> check_is_type;
};
auto check_is_string = [](const std::shared_ptr<const LogicalType>& logical_type) {
return logical_type->is_string();
};
auto check_is_map = [](const std::shared_ptr<const LogicalType>& logical_type) {
return logical_type->is_map();
};
auto check_is_list = [](const std::shared_ptr<const LogicalType>& logical_type) {
return logical_type->is_list();
};
auto check_is_enum = [](const std::shared_ptr<const LogicalType>& logical_type) {
return logical_type->is_enum();
};
auto check_is_date = [](const std::shared_ptr<const LogicalType>& logical_type) {
return logical_type->is_date();
};
auto check_is_time = [](const std::shared_ptr<const LogicalType>& logical_type) {
return logical_type->is_time();
};
auto check_is_timestamp = [](const std::shared_ptr<const LogicalType>& logical_type) {
return logical_type->is_timestamp();
};
auto check_is_int = [](const std::shared_ptr<const LogicalType>& logical_type) {
return logical_type->is_int();
};
auto check_is_JSON = [](const std::shared_ptr<const LogicalType>& logical_type) {
return logical_type->is_JSON();
};
auto check_is_BSON = [](const std::shared_ptr<const LogicalType>& logical_type) {
return logical_type->is_BSON();
};
auto check_is_interval = [](const std::shared_ptr<const LogicalType>& logical_type) {
return logical_type->is_interval();
};
auto check_is_none = [](const std::shared_ptr<const LogicalType>& logical_type) {
return logical_type->is_none();
};
std::vector<ConfirmFactoryEquivalenceArguments> cases = {
{ConvertedType::UTF8, LogicalType::String(), check_is_string},
{ConvertedType::MAP, LogicalType::Map(), check_is_map},
{ConvertedType::MAP_KEY_VALUE, LogicalType::Map(), check_is_map},
{ConvertedType::LIST, LogicalType::List(), check_is_list},
{ConvertedType::ENUM, LogicalType::Enum(), check_is_enum},
{ConvertedType::DATE, LogicalType::Date(), check_is_date},
{ConvertedType::TIME_MILLIS, LogicalType::Time(true, LogicalType::TimeUnit::MILLIS),
check_is_time},
{ConvertedType::TIME_MICROS, LogicalType::Time(true, LogicalType::TimeUnit::MICROS),
check_is_time},
{ConvertedType::TIMESTAMP_MILLIS,
LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS), check_is_timestamp},
{ConvertedType::TIMESTAMP_MICROS,
LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS), check_is_timestamp},
{ConvertedType::UINT_8, LogicalType::Int(8, false), check_is_int},
{ConvertedType::UINT_16, LogicalType::Int(16, false), check_is_int},
{ConvertedType::UINT_32, LogicalType::Int(32, false), check_is_int},
{ConvertedType::UINT_64, LogicalType::Int(64, false), check_is_int},
{ConvertedType::INT_8, LogicalType::Int(8, true), check_is_int},
{ConvertedType::INT_16, LogicalType::Int(16, true), check_is_int},
{ConvertedType::INT_32, LogicalType::Int(32, true), check_is_int},
{ConvertedType::INT_64, LogicalType::Int(64, true), check_is_int},
{ConvertedType::JSON, LogicalType::JSON(), check_is_JSON},
{ConvertedType::BSON, LogicalType::BSON(), check_is_BSON},
{ConvertedType::INTERVAL, LogicalType::Interval(), check_is_interval},
{ConvertedType::NONE, LogicalType::None(), check_is_none}};
for (const ConfirmFactoryEquivalenceArguments& c : cases) {
ConfirmFactoryEquivalence(c.converted_type, c.logical_type, c.check_is_type);
}
// ConvertedType::DECIMAL, LogicalType::Decimal, is_decimal
schema::DecimalMetadata converted_decimal_metadata;
converted_decimal_metadata.isset = true;
converted_decimal_metadata.precision = 10;
converted_decimal_metadata.scale = 4;
std::shared_ptr<const LogicalType> from_converted_type =
LogicalType::FromConvertedType(ConvertedType::DECIMAL, converted_decimal_metadata);
std::shared_ptr<const LogicalType> from_make = LogicalType::Decimal(10, 4);
ASSERT_EQ(from_converted_type->type(), from_make->type());
ASSERT_TRUE(from_converted_type->Equals(*from_make));
ASSERT_TRUE(from_converted_type->is_decimal());
ASSERT_TRUE(from_make->is_decimal());
ASSERT_TRUE(LogicalType::Decimal(16)->Equals(*LogicalType::Decimal(16, 0)));
}
static void ConfirmConvertedTypeCompatibility(
const std::shared_ptr<const LogicalType>& original,
ConvertedType::type expected_converted_type) {
ASSERT_TRUE(original->is_valid())
<< original->ToString() << " logical type unexpectedly is not valid";
schema::DecimalMetadata converted_decimal_metadata;
ConvertedType::type converted_type =
original->ToConvertedType(&converted_decimal_metadata);
ASSERT_EQ(converted_type, expected_converted_type)
<< original->ToString()
<< " logical type unexpectedly returns incorrect converted type";
ASSERT_FALSE(converted_decimal_metadata.isset)
<< original->ToString()
<< " logical type unexpectedly returns converted decimal metatdata that is set";
ASSERT_TRUE(original->is_compatible(converted_type, converted_decimal_metadata))
<< original->ToString()
<< " logical type unexpectedly is incompatible with converted type and decimal "
"metadata it returned";
ASSERT_FALSE(original->is_compatible(converted_type, {true, 1, 1}))
<< original->ToString()
<< " logical type unexpectedly is compatible with converted decimal metadata that "
"is "
"set";
ASSERT_TRUE(original->is_compatible(converted_type))
<< original->ToString()
<< " logical type unexpectedly is incompatible with converted type it returned";
std::shared_ptr<const LogicalType> reconstructed =
LogicalType::FromConvertedType(converted_type, converted_decimal_metadata);
ASSERT_TRUE(reconstructed->is_valid()) << "Reconstructed " << reconstructed->ToString()
<< " logical type unexpectedly is not valid";
ASSERT_TRUE(reconstructed->Equals(*original))
<< "Reconstructed logical type (" << reconstructed->ToString()
<< ") unexpectedly not equivalent to original logical type ("
<< original->ToString() << ")";
return;
}
TEST(TestLogicalTypeConstruction, ConvertedTypeCompatibility) {
// For each legacy converted type, ensure that the equivalent logical type
// emits correct, compatible converted type information and that the emitted
// information can be used to reconstruct another equivalent logical type.
struct ExpectedConvertedType {
std::shared_ptr<const LogicalType> logical_type;
ConvertedType::type converted_type;
};
std::vector<ExpectedConvertedType> cases = {
{LogicalType::String(), ConvertedType::UTF8},
{LogicalType::Map(), ConvertedType::MAP},
{LogicalType::List(), ConvertedType::LIST},
{LogicalType::Enum(), ConvertedType::ENUM},
{LogicalType::Date(), ConvertedType::DATE},
{LogicalType::Time(true, LogicalType::TimeUnit::MILLIS),
ConvertedType::TIME_MILLIS},
{LogicalType::Time(true, LogicalType::TimeUnit::MICROS),
ConvertedType::TIME_MICROS},
{LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS),
ConvertedType::TIMESTAMP_MILLIS},
{LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS),
ConvertedType::TIMESTAMP_MICROS},
{LogicalType::Int(8, false), ConvertedType::UINT_8},
{LogicalType::Int(16, false), ConvertedType::UINT_16},
{LogicalType::Int(32, false), ConvertedType::UINT_32},
{LogicalType::Int(64, false), ConvertedType::UINT_64},
{LogicalType::Int(8, true), ConvertedType::INT_8},
{LogicalType::Int(16, true), ConvertedType::INT_16},
{LogicalType::Int(32, true), ConvertedType::INT_32},
{LogicalType::Int(64, true), ConvertedType::INT_64},
{LogicalType::JSON(), ConvertedType::JSON},
{LogicalType::BSON(), ConvertedType::BSON},
{LogicalType::Interval(), ConvertedType::INTERVAL},
{LogicalType::None(), ConvertedType::NONE}};
for (const ExpectedConvertedType& c : cases) {
ConfirmConvertedTypeCompatibility(c.logical_type, c.converted_type);
}
// Special cases ...
std::shared_ptr<const LogicalType> original;
ConvertedType::type converted_type;
schema::DecimalMetadata converted_decimal_metadata;
std::shared_ptr<const LogicalType> reconstructed;
// DECIMAL
std::memset(&converted_decimal_metadata, 0x00, sizeof(converted_decimal_metadata));
original = LogicalType::Decimal(6, 2);
ASSERT_TRUE(original->is_valid());
converted_type = original->ToConvertedType(&converted_decimal_metadata);
ASSERT_EQ(converted_type, ConvertedType::DECIMAL);
ASSERT_TRUE(converted_decimal_metadata.isset);
ASSERT_EQ(converted_decimal_metadata.precision, 6);
ASSERT_EQ(converted_decimal_metadata.scale, 2);
ASSERT_TRUE(original->is_compatible(converted_type, converted_decimal_metadata));
reconstructed =
LogicalType::FromConvertedType(converted_type, converted_decimal_metadata);
ASSERT_TRUE(reconstructed->is_valid());
ASSERT_TRUE(reconstructed->Equals(*original));
// Unknown
original = LogicalType::Unknown();
ASSERT_TRUE(original->is_invalid());
ASSERT_FALSE(original->is_valid());
converted_type = original->ToConvertedType(&converted_decimal_metadata);
ASSERT_EQ(converted_type, ConvertedType::NA);
ASSERT_FALSE(converted_decimal_metadata.isset);
ASSERT_TRUE(original->is_compatible(converted_type, converted_decimal_metadata));
ASSERT_TRUE(original->is_compatible(converted_type));
reconstructed =
LogicalType::FromConvertedType(converted_type, converted_decimal_metadata);
ASSERT_TRUE(reconstructed->is_invalid());
ASSERT_TRUE(reconstructed->Equals(*original));
}
static void ConfirmNewTypeIncompatibility(
const std::shared_ptr<const LogicalType>& logical_type,
std::function<bool(const std::shared_ptr<const LogicalType>&)> check_is_type) {
ASSERT_TRUE(logical_type->is_valid())
<< logical_type->ToString() << " logical type unexpectedly is not valid";
ASSERT_TRUE(check_is_type(logical_type))
<< logical_type->ToString() << " logical type is not expected logical type";
schema::DecimalMetadata converted_decimal_metadata;
ConvertedType::type converted_type =
logical_type->ToConvertedType(&converted_decimal_metadata);
ASSERT_EQ(converted_type, ConvertedType::NONE)
<< logical_type->ToString()
<< " logical type converted type unexpectedly is not NONE";
ASSERT_FALSE(converted_decimal_metadata.isset)
<< logical_type->ToString()
<< " logical type converted decimal metadata unexpectedly is set";
return;
}
TEST(TestLogicalTypeConstruction, NewTypeIncompatibility) {
// For each new logical type, ensure that the type
// correctly reports that it has no legacy equivalent
struct ConfirmNewTypeIncompatibilityArguments {
std::shared_ptr<const LogicalType> logical_type;
std::function<bool(const std::shared_ptr<const LogicalType>&)> check_is_type;
};
auto check_is_UUID = [](const std::shared_ptr<const LogicalType>& logical_type) {
return logical_type->is_UUID();
};
auto check_is_null = [](const std::shared_ptr<const LogicalType>& logical_type) {
return logical_type->is_null();
};
auto check_is_time = [](const std::shared_ptr<const LogicalType>& logical_type) {
return logical_type->is_time();
};
auto check_is_timestamp = [](const std::shared_ptr<const LogicalType>& logical_type) {
return logical_type->is_timestamp();
};
std::vector<ConfirmNewTypeIncompatibilityArguments> cases = {
{LogicalType::UUID(), check_is_UUID},
{LogicalType::Null(), check_is_null},
{LogicalType::Time(false, LogicalType::TimeUnit::MILLIS), check_is_time},
{LogicalType::Time(false, LogicalType::TimeUnit::MICROS), check_is_time},
{LogicalType::Time(false, LogicalType::TimeUnit::NANOS), check_is_time},
{LogicalType::Time(true, LogicalType::TimeUnit::NANOS), check_is_time},
{LogicalType::Timestamp(false, LogicalType::TimeUnit::NANOS), check_is_timestamp},
{LogicalType::Timestamp(true, LogicalType::TimeUnit::NANOS), check_is_timestamp},
};
for (const ConfirmNewTypeIncompatibilityArguments& c : cases) {
ConfirmNewTypeIncompatibility(c.logical_type, c.check_is_type);
}
}
TEST(TestLogicalTypeConstruction, FactoryExceptions) {
// Ensure that logical type construction catches invalid arguments
std::vector<std::function<void()>> cases = {
[]() {
TimeLogicalType::Make(true, LogicalType::TimeUnit::UNKNOWN);
}, // Invalid TimeUnit
[]() {
TimestampLogicalType::Make(true, LogicalType::TimeUnit::UNKNOWN);
}, // Invalid TimeUnit
[]() { IntLogicalType::Make(-1, false); }, // Invalid bit width
[]() { IntLogicalType::Make(0, false); }, // Invalid bit width
[]() { IntLogicalType::Make(1, false); }, // Invalid bit width
[]() { IntLogicalType::Make(65, false); }, // Invalid bit width
[]() { DecimalLogicalType::Make(-1); }, // Invalid precision
[]() { DecimalLogicalType::Make(0); }, // Invalid precision
[]() { DecimalLogicalType::Make(0, 0); }, // Invalid precision
[]() { DecimalLogicalType::Make(10, -1); }, // Invalid scale
[]() { DecimalLogicalType::Make(10, 11); } // Invalid scale
};
for (auto f : cases) {
ASSERT_ANY_THROW(f());
}
}
static void ConfirmLogicalTypeProperties(
const std::shared_ptr<const LogicalType>& logical_type, bool nested, bool serialized,
bool valid) {
ASSERT_TRUE(logical_type->is_nested() == nested)
<< logical_type->ToString() << " logical type has incorrect nested() property";
ASSERT_TRUE(logical_type->is_serialized() == serialized)
<< logical_type->ToString() << " logical type has incorrect serialized() property";
ASSERT_TRUE(logical_type->is_valid() == valid)
<< logical_type->ToString() << " logical type has incorrect valid() property";
ASSERT_TRUE(logical_type->is_nonnested() != nested)
<< logical_type->ToString() << " logical type has incorrect nonnested() property";
ASSERT_TRUE(logical_type->is_invalid() != valid)
<< logical_type->ToString() << " logical type has incorrect invalid() property";
return;
}
TEST(TestLogicalTypeOperation, LogicalTypeProperties) {
// For each logical type, ensure that the correct general properties are reported
struct ExpectedProperties {
std::shared_ptr<const LogicalType> logical_type;
bool nested;
bool serialized;
bool valid;
};
std::vector<ExpectedProperties> cases = {
{StringLogicalType::Make(), false, true, true},
{MapLogicalType::Make(), true, true, true},
{ListLogicalType::Make(), true, true, true},
{EnumLogicalType::Make(), false, true, true},
{DecimalLogicalType::Make(16, 6), false, true, true},
{DateLogicalType::Make(), false, true, true},
{TimeLogicalType::Make(true, LogicalType::TimeUnit::MICROS), false, true, true},
{TimestampLogicalType::Make(true, LogicalType::TimeUnit::MICROS), false, true,
true},
{IntervalLogicalType::Make(), false, true, true},
{IntLogicalType::Make(8, false), false, true, true},
{IntLogicalType::Make(64, true), false, true, true},
{NullLogicalType::Make(), false, true, true},
{JSONLogicalType::Make(), false, true, true},
{BSONLogicalType::Make(), false, true, true},
{UUIDLogicalType::Make(), false, true, true},
{NoLogicalType::Make(), false, false, true},
{UnknownLogicalType::Make(), false, false, false},
};
for (const ExpectedProperties& c : cases) {
ConfirmLogicalTypeProperties(c.logical_type, c.nested, c.serialized, c.valid);
}
}
static constexpr int PHYSICAL_TYPE_COUNT = 8;
static Type::type physical_type[PHYSICAL_TYPE_COUNT] = {
Type::BOOLEAN, Type::INT32, Type::INT64, Type::INT96,
Type::FLOAT, Type::DOUBLE, Type::BYTE_ARRAY, Type::FIXED_LEN_BYTE_ARRAY};
static void ConfirmSinglePrimitiveTypeApplicability(
const std::shared_ptr<const LogicalType>& logical_type, Type::type applicable_type) {
for (int i = 0; i < PHYSICAL_TYPE_COUNT; ++i) {
if (physical_type[i] == applicable_type) {
ASSERT_TRUE(logical_type->is_applicable(physical_type[i]))
<< logical_type->ToString()
<< " logical type unexpectedly inapplicable to physical type "
<< TypeToString(physical_type[i]);
} else {
ASSERT_FALSE(logical_type->is_applicable(physical_type[i]))
<< logical_type->ToString()
<< " logical type unexpectedly applicable to physical type "
<< TypeToString(physical_type[i]);
}
}
return;
}
static void ConfirmAnyPrimitiveTypeApplicability(
const std::shared_ptr<const LogicalType>& logical_type) {
for (int i = 0; i < PHYSICAL_TYPE_COUNT; ++i) {
ASSERT_TRUE(logical_type->is_applicable(physical_type[i]))
<< logical_type->ToString()
<< " logical type unexpectedly inapplicable to physical type "
<< TypeToString(physical_type[i]);
}
return;
}
static void ConfirmNoPrimitiveTypeApplicability(
const std::shared_ptr<const LogicalType>& logical_type) {
for (int i = 0; i < PHYSICAL_TYPE_COUNT; ++i) {
ASSERT_FALSE(logical_type->is_applicable(physical_type[i]))
<< logical_type->ToString()
<< " logical type unexpectedly applicable to physical type "
<< TypeToString(physical_type[i]);
}
return;
}
TEST(TestLogicalTypeOperation, LogicalTypeApplicability) {
// Check that each logical type correctly reports which
// underlying primitive type(s) it can be applied to
struct ExpectedApplicability {
std::shared_ptr<const LogicalType> logical_type;
Type::type applicable_type;
};
std::vector<ExpectedApplicability> single_type_cases = {
{LogicalType::String(), Type::BYTE_ARRAY},
{LogicalType::Enum(), Type::BYTE_ARRAY},
{LogicalType::Date(), Type::INT32},
{LogicalType::Time(true, LogicalType::TimeUnit::MILLIS), Type::INT32},
{LogicalType::Time(true, LogicalType::TimeUnit::MICROS), Type::INT64},
{LogicalType::Time(true, LogicalType::TimeUnit::NANOS), Type::INT64},
{LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS), Type::INT64},
{LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS), Type::INT64},
{LogicalType::Timestamp(true, LogicalType::TimeUnit::NANOS), Type::INT64},
{LogicalType::Int(8, false), Type::INT32},
{LogicalType::Int(16, false), Type::INT32},
{LogicalType::Int(32, false), Type::INT32},
{LogicalType::Int(64, false), Type::INT64},
{LogicalType::Int(8, true), Type::INT32},
{LogicalType::Int(16, true), Type::INT32},
{LogicalType::Int(32, true), Type::INT32},
{LogicalType::Int(64, true), Type::INT64},
{LogicalType::JSON(), Type::BYTE_ARRAY},
{LogicalType::BSON(), Type::BYTE_ARRAY}};
for (const ExpectedApplicability& c : single_type_cases) {
ConfirmSinglePrimitiveTypeApplicability(c.logical_type, c.applicable_type);
}
std::vector<std::shared_ptr<const LogicalType>> no_type_cases = {LogicalType::Map(),
LogicalType::List()};
for (auto c : no_type_cases) {
ConfirmNoPrimitiveTypeApplicability(c);
}
std::vector<std::shared_ptr<const LogicalType>> any_type_cases = {
LogicalType::Null(), LogicalType::None(), LogicalType::Unknown()};
for (auto c : any_type_cases) {
ConfirmAnyPrimitiveTypeApplicability(c);
}
// Fixed binary, exact length cases ...
struct InapplicableType {
Type::type physical_type;
int physical_length;
};
std::vector<InapplicableType> inapplicable_types = {{Type::FIXED_LEN_BYTE_ARRAY, 8},
{Type::FIXED_LEN_BYTE_ARRAY, 20},
{Type::BOOLEAN, -1},
{Type::INT32, -1},
{Type::INT64, -1},
{Type::INT96, -1},
{Type::FLOAT, -1},
{Type::DOUBLE, -1},
{Type::BYTE_ARRAY, -1}};
std::shared_ptr<const LogicalType> logical_type;
logical_type = LogicalType::Interval();
ASSERT_TRUE(logical_type->is_applicable(Type::FIXED_LEN_BYTE_ARRAY, 12));
for (const InapplicableType& t : inapplicable_types) {
ASSERT_FALSE(logical_type->is_applicable(t.physical_type, t.physical_length));
}
logical_type = LogicalType::UUID();
ASSERT_TRUE(logical_type->is_applicable(Type::FIXED_LEN_BYTE_ARRAY, 16));
for (const InapplicableType& t : inapplicable_types) {
ASSERT_FALSE(logical_type->is_applicable(t.physical_type, t.physical_length));
}
}
TEST(TestLogicalTypeOperation, DecimalLogicalTypeApplicability) {
// Check that the decimal logical type correctly reports which
// underlying primitive type(s) it can be applied to
std::shared_ptr<const LogicalType> logical_type;
for (int32_t precision = 1; precision <= 9; ++precision) {
logical_type = DecimalLogicalType::Make(precision, 0);
ASSERT_TRUE(logical_type->is_applicable(Type::INT32))
<< logical_type->ToString()
<< " unexpectedly inapplicable to physical type INT32";
}
logical_type = DecimalLogicalType::Make(10, 0);
ASSERT_FALSE(logical_type->is_applicable(Type::INT32))
<< logical_type->ToString() << " unexpectedly applicable to physical type INT32";
for (int32_t precision = 1; precision <= 18; ++precision) {
logical_type = DecimalLogicalType::Make(precision, 0);
ASSERT_TRUE(logical_type->is_applicable(Type::INT64))
<< logical_type->ToString()
<< " unexpectedly inapplicable to physical type INT64";
}
logical_type = DecimalLogicalType::Make(19, 0);
ASSERT_FALSE(logical_type->is_applicable(Type::INT64))
<< logical_type->ToString() << " unexpectedly applicable to physical type INT64";
for (int32_t precision = 1; precision <= 36; ++precision) {
logical_type = DecimalLogicalType::Make(precision, 0);
ASSERT_TRUE(logical_type->is_applicable(Type::BYTE_ARRAY))
<< logical_type->ToString()
<< " unexpectedly inapplicable to physical type BYTE_ARRAY";
}
struct PrecisionLimits {
int32_t physical_length;
int32_t precision_limit;
};
std::vector<PrecisionLimits> cases = {{1, 2}, {2, 4}, {3, 6}, {4, 9}, {8, 18},
{10, 23}, {16, 38}, {20, 47}, {32, 76}};
for (const PrecisionLimits& c : cases) {
int32_t precision;
for (precision = 1; precision <= c.precision_limit; ++precision) {
logical_type = DecimalLogicalType::Make(precision, 0);
ASSERT_TRUE(
logical_type->is_applicable(Type::FIXED_LEN_BYTE_ARRAY, c.physical_length))
<< logical_type->ToString()
<< " unexpectedly inapplicable to physical type FIXED_LEN_BYTE_ARRAY with "
"length "
<< c.physical_length;
}
logical_type = DecimalLogicalType::Make(precision, 0);
ASSERT_FALSE(
logical_type->is_applicable(Type::FIXED_LEN_BYTE_ARRAY, c.physical_length))
<< logical_type->ToString()
<< " unexpectedly applicable to physical type FIXED_LEN_BYTE_ARRAY with length "
<< c.physical_length;
}
ASSERT_FALSE((DecimalLogicalType::Make(16, 6))->is_applicable(Type::BOOLEAN));
ASSERT_FALSE((DecimalLogicalType::Make(16, 6))->is_applicable(Type::FLOAT));
ASSERT_FALSE((DecimalLogicalType::Make(16, 6))->is_applicable(Type::DOUBLE));
}
TEST(TestLogicalTypeOperation, LogicalTypeRepresentation) {
// Ensure that each logical type prints a correct string and
// JSON representation
struct ExpectedRepresentation {
std::shared_ptr<const LogicalType> logical_type;
const char* string_representation;
const char* JSON_representation;
};
std::vector<ExpectedRepresentation> cases = {
{LogicalType::Unknown(), "Unknown", R"({"Type": "Unknown"})"},
{LogicalType::String(), "String", R"({"Type": "String"})"},
{LogicalType::Map(), "Map", R"({"Type": "Map"})"},
{LogicalType::List(), "List", R"({"Type": "List"})"},
{LogicalType::Enum(), "Enum", R"({"Type": "Enum"})"},
{LogicalType::Decimal(10, 4), "Decimal(precision=10, scale=4)",
R"({"Type": "Decimal", "precision": 10, "scale": 4})"},
{LogicalType::Decimal(10), "Decimal(precision=10, scale=0)",
R"({"Type": "Decimal", "precision": 10, "scale": 0})"},
{LogicalType::Date(), "Date", R"({"Type": "Date"})"},
{LogicalType::Time(true, LogicalType::TimeUnit::MILLIS),
"Time(isAdjustedToUTC=true, timeUnit=milliseconds)",
R"({"Type": "Time", "isAdjustedToUTC": true, "timeUnit": "milliseconds"})"},
{LogicalType::Time(true, LogicalType::TimeUnit::MICROS),
"Time(isAdjustedToUTC=true, timeUnit=microseconds)",
R"({"Type": "Time", "isAdjustedToUTC": true, "timeUnit": "microseconds"})"},
{LogicalType::Time(true, LogicalType::TimeUnit::NANOS),
"Time(isAdjustedToUTC=true, timeUnit=nanoseconds)",
R"({"Type": "Time", "isAdjustedToUTC": true, "timeUnit": "nanoseconds"})"},
{LogicalType::Time(false, LogicalType::TimeUnit::MILLIS),
"Time(isAdjustedToUTC=false, timeUnit=milliseconds)",
R"({"Type": "Time", "isAdjustedToUTC": false, "timeUnit": "milliseconds"})"},
{LogicalType::Time(false, LogicalType::TimeUnit::MICROS),
"Time(isAdjustedToUTC=false, timeUnit=microseconds)",
R"({"Type": "Time", "isAdjustedToUTC": false, "timeUnit": "microseconds"})"},
{LogicalType::Time(false, LogicalType::TimeUnit::NANOS),
"Time(isAdjustedToUTC=false, timeUnit=nanoseconds)",
R"({"Type": "Time", "isAdjustedToUTC": false, "timeUnit": "nanoseconds"})"},
{LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS),
"Timestamp(isAdjustedToUTC=true, timeUnit=milliseconds, "
"is_from_converted_type=false, force_set_converted_type=false)",
R"({"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "milliseconds", )"
R"("is_from_converted_type": false, "force_set_converted_type": false})"},
{LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS),
"Timestamp(isAdjustedToUTC=true, timeUnit=microseconds, "
"is_from_converted_type=false, force_set_converted_type=false)",
R"({"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "microseconds", )"
R"("is_from_converted_type": false, "force_set_converted_type": false})"},
{LogicalType::Timestamp(true, LogicalType::TimeUnit::NANOS),
"Timestamp(isAdjustedToUTC=true, timeUnit=nanoseconds, "
"is_from_converted_type=false, force_set_converted_type=false)",
R"({"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "nanoseconds", )"
R"("is_from_converted_type": false, "force_set_converted_type": false})"},
{LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS, true, true),
"Timestamp(isAdjustedToUTC=false, timeUnit=milliseconds, "
"is_from_converted_type=true, force_set_converted_type=true)",
R"({"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "milliseconds", )"
R"("is_from_converted_type": true, "force_set_converted_type": true})"},
{LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS),
"Timestamp(isAdjustedToUTC=false, timeUnit=microseconds, "
"is_from_converted_type=false, force_set_converted_type=false)",
R"({"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "microseconds", )"
R"("is_from_converted_type": false, "force_set_converted_type": false})"},
{LogicalType::Timestamp(false, LogicalType::TimeUnit::NANOS),
"Timestamp(isAdjustedToUTC=false, timeUnit=nanoseconds, "
"is_from_converted_type=false, force_set_converted_type=false)",
R"({"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "nanoseconds", )"
R"("is_from_converted_type": false, "force_set_converted_type": false})"},
{LogicalType::Interval(), "Interval", R"({"Type": "Interval"})"},
{LogicalType::Int(8, false), "Int(bitWidth=8, isSigned=false)",
R"({"Type": "Int", "bitWidth": 8, "isSigned": false})"},
{LogicalType::Int(16, false), "Int(bitWidth=16, isSigned=false)",
R"({"Type": "Int", "bitWidth": 16, "isSigned": false})"},
{LogicalType::Int(32, false), "Int(bitWidth=32, isSigned=false)",
R"({"Type": "Int", "bitWidth": 32, "isSigned": false})"},
{LogicalType::Int(64, false), "Int(bitWidth=64, isSigned=false)",
R"({"Type": "Int", "bitWidth": 64, "isSigned": false})"},
{LogicalType::Int(8, true), "Int(bitWidth=8, isSigned=true)",
R"({"Type": "Int", "bitWidth": 8, "isSigned": true})"},
{LogicalType::Int(16, true), "Int(bitWidth=16, isSigned=true)",
R"({"Type": "Int", "bitWidth": 16, "isSigned": true})"},
{LogicalType::Int(32, true), "Int(bitWidth=32, isSigned=true)",
R"({"Type": "Int", "bitWidth": 32, "isSigned": true})"},
{LogicalType::Int(64, true), "Int(bitWidth=64, isSigned=true)",
R"({"Type": "Int", "bitWidth": 64, "isSigned": true})"},
{LogicalType::Null(), "Null", R"({"Type": "Null"})"},
{LogicalType::JSON(), "JSON", R"({"Type": "JSON"})"},
{LogicalType::BSON(), "BSON", R"({"Type": "BSON"})"},
{LogicalType::UUID(), "UUID", R"({"Type": "UUID"})"},
{LogicalType::None(), "None", R"({"Type": "None"})"},
};
for (const ExpectedRepresentation& c : cases) {
ASSERT_STREQ(c.logical_type->ToString().c_str(), c.string_representation);
ASSERT_STREQ(c.logical_type->ToJSON().c_str(), c.JSON_representation);
}
}
TEST(TestLogicalTypeOperation, LogicalTypeSortOrder) {
// Ensure that each logical type reports the correct sort order
struct ExpectedSortOrder {
std::shared_ptr<const LogicalType> logical_type;
SortOrder::type sort_order;
};
std::vector<ExpectedSortOrder> cases = {
{LogicalType::Unknown(), SortOrder::UNKNOWN},
{LogicalType::String(), SortOrder::UNSIGNED},
{LogicalType::Map(), SortOrder::UNKNOWN},
{LogicalType::List(), SortOrder::UNKNOWN},
{LogicalType::Enum(), SortOrder::UNSIGNED},
{LogicalType::Decimal(8, 2), SortOrder::SIGNED},
{LogicalType::Date(), SortOrder::SIGNED},
{LogicalType::Time(true, LogicalType::TimeUnit::MILLIS), SortOrder::SIGNED},
{LogicalType::Time(true, LogicalType::TimeUnit::MICROS), SortOrder::SIGNED},
{LogicalType::Time(true, LogicalType::TimeUnit::NANOS), SortOrder::SIGNED},
{LogicalType::Time(false, LogicalType::TimeUnit::MILLIS), SortOrder::SIGNED},
{LogicalType::Time(false, LogicalType::TimeUnit::MICROS), SortOrder::SIGNED},
{LogicalType::Time(false, LogicalType::TimeUnit::NANOS), SortOrder::SIGNED},
{LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS), SortOrder::SIGNED},
{LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS), SortOrder::SIGNED},
{LogicalType::Timestamp(true, LogicalType::TimeUnit::NANOS), SortOrder::SIGNED},
{LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS), SortOrder::SIGNED},
{LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS), SortOrder::SIGNED},
{LogicalType::Timestamp(false, LogicalType::TimeUnit::NANOS), SortOrder::SIGNED},
{LogicalType::Interval(), SortOrder::UNKNOWN},
{LogicalType::Int(8, false), SortOrder::UNSIGNED},
{LogicalType::Int(16, false), SortOrder::UNSIGNED},
{LogicalType::Int(32, false), SortOrder::UNSIGNED},
{LogicalType::Int(64, false), SortOrder::UNSIGNED},
{LogicalType::Int(8, true), SortOrder::SIGNED},
{LogicalType::Int(16, true), SortOrder::SIGNED},
{LogicalType::Int(32, true), SortOrder::SIGNED},
{LogicalType::Int(64, true), SortOrder::SIGNED},
{LogicalType::Null(), SortOrder::UNKNOWN},
{LogicalType::JSON(), SortOrder::UNSIGNED},
{LogicalType::BSON(), SortOrder::UNSIGNED},
{LogicalType::UUID(), SortOrder::UNSIGNED},
{LogicalType::None(), SortOrder::UNKNOWN}};
for (const ExpectedSortOrder& c : cases) {
ASSERT_EQ(c.logical_type->sort_order(), c.sort_order)
<< c.logical_type->ToString() << " logical type has incorrect sort order";
}
}
static void ConfirmPrimitiveNodeFactoryEquivalence(
const std::shared_ptr<const LogicalType>& logical_type,
ConvertedType::type converted_type, Type::type physical_type, int physical_length,
int precision, int scale) {
std::string name = "something";
Repetition::type repetition = Repetition::REQUIRED;
NodePtr from_converted_type = PrimitiveNode::Make(
name, repetition, physical_type, converted_type, physical_length, precision, scale);
NodePtr from_logical_type =
PrimitiveNode::Make(name, repetition, logical_type, physical_type, physical_length);
ASSERT_TRUE(from_converted_type->Equals(from_logical_type.get()))
<< "Primitive node constructed with converted type "
<< ConvertedTypeToString(converted_type)
<< " unexpectedly not equivalent to primitive node constructed with logical "
"type "
<< logical_type->ToString();
return;
}
static void ConfirmGroupNodeFactoryEquivalence(
std::string name, const std::shared_ptr<const LogicalType>& logical_type,
ConvertedType::type converted_type) {
Repetition::type repetition = Repetition::OPTIONAL;
NodePtr from_converted_type = GroupNode::Make(name, repetition, {}, converted_type);
NodePtr from_logical_type = GroupNode::Make(name, repetition, {}, logical_type);
ASSERT_TRUE(from_converted_type->Equals(from_logical_type.get()))
<< "Group node constructed with converted type "
<< ConvertedTypeToString(converted_type)
<< " unexpectedly not equivalent to group node constructed with logical type "
<< logical_type->ToString();
return;
}
TEST(TestSchemaNodeCreation, FactoryEquivalence) {
// Ensure that the Node factory methods produce equivalent results regardless
// of whether they are given a converted type or a logical type.
// Primitive nodes ...
struct PrimitiveNodeFactoryArguments {
std::shared_ptr<const LogicalType> logical_type;
ConvertedType::type converted_type;
Type::type physical_type;
int physical_length;
int precision;
int scale;
};
std::vector<PrimitiveNodeFactoryArguments> cases = {
{LogicalType::String(), ConvertedType::UTF8, Type::BYTE_ARRAY, -1, -1, -1},
{LogicalType::Enum(), ConvertedType::ENUM, Type::BYTE_ARRAY, -1, -1, -1},
{LogicalType::Decimal(16, 6), ConvertedType::DECIMAL, Type::INT64, -1, 16, 6},
{LogicalType::Date(), ConvertedType::DATE, Type::INT32, -1, -1, -1},
{LogicalType::Time(true, LogicalType::TimeUnit::MILLIS), ConvertedType::TIME_MILLIS,
Type::INT32, -1, -1, -1},
{LogicalType::Time(true, LogicalType::TimeUnit::MICROS), ConvertedType::TIME_MICROS,
Type::INT64, -1, -1, -1},
{LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS),
ConvertedType::TIMESTAMP_MILLIS, Type::INT64, -1, -1, -1},
{LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS),
ConvertedType::TIMESTAMP_MICROS, Type::INT64, -1, -1, -1},
{LogicalType::Interval(), ConvertedType::INTERVAL, Type::FIXED_LEN_BYTE_ARRAY, 12,
-1, -1},
{LogicalType::Int(8, false), ConvertedType::UINT_8, Type::INT32, -1, -1, -1},
{LogicalType::Int(8, true), ConvertedType::INT_8, Type::INT32, -1, -1, -1},
{LogicalType::Int(16, false), ConvertedType::UINT_16, Type::INT32, -1, -1, -1},
{LogicalType::Int(16, true), ConvertedType::INT_16, Type::INT32, -1, -1, -1},
{LogicalType::Int(32, false), ConvertedType::UINT_32, Type::INT32, -1, -1, -1},
{LogicalType::Int(32, true), ConvertedType::INT_32, Type::INT32, -1, -1, -1},
{LogicalType::Int(64, false), ConvertedType::UINT_64, Type::INT64, -1, -1, -1},
{LogicalType::Int(64, true), ConvertedType::INT_64, Type::INT64, -1, -1, -1},
{LogicalType::JSON(), ConvertedType::JSON, Type::BYTE_ARRAY, -1, -1, -1},
{LogicalType::BSON(), ConvertedType::BSON, Type::BYTE_ARRAY, -1, -1, -1},
{LogicalType::None(), ConvertedType::NONE, Type::INT64, -1, -1, -1}};
for (const PrimitiveNodeFactoryArguments& c : cases) {
ConfirmPrimitiveNodeFactoryEquivalence(c.logical_type, c.converted_type,
c.physical_type, c.physical_length,
c.precision, c.scale);
}
// Group nodes ...
ConfirmGroupNodeFactoryEquivalence("map", LogicalType::Map(), ConvertedType::MAP);
ConfirmGroupNodeFactoryEquivalence("list", LogicalType::List(), ConvertedType::LIST);
}
TEST(TestSchemaNodeCreation, FactoryExceptions) {
// Ensure that the Node factory method that accepts a logical type refuses to create
// an object if compatibility conditions are not met
// Nested logical type on non-group node ...
ASSERT_ANY_THROW(PrimitiveNode::Make("map", Repetition::REQUIRED,
MapLogicalType::Make(), Type::INT64));
// Incompatible primitive type ...
ASSERT_ANY_THROW(PrimitiveNode::Make("string", Repetition::REQUIRED,
StringLogicalType::Make(), Type::BOOLEAN));
// Incompatible primitive length ...
ASSERT_ANY_THROW(PrimitiveNode::Make("interval", Repetition::REQUIRED,
IntervalLogicalType::Make(),
Type::FIXED_LEN_BYTE_ARRAY, 11));
// Primitive too small for given precision ...
ASSERT_ANY_THROW(PrimitiveNode::Make("decimal", Repetition::REQUIRED,
DecimalLogicalType::Make(16, 6), Type::INT32));
// Incompatible primitive length ...
ASSERT_ANY_THROW(PrimitiveNode::Make("uuid", Repetition::REQUIRED,
UUIDLogicalType::Make(),
Type::FIXED_LEN_BYTE_ARRAY, 64));
// Non-positive length argument for fixed length binary ...
ASSERT_ANY_THROW(PrimitiveNode::Make("negative_length", Repetition::REQUIRED,
NoLogicalType::Make(), Type::FIXED_LEN_BYTE_ARRAY,
-16));
// Non-positive length argument for fixed length binary ...
ASSERT_ANY_THROW(PrimitiveNode::Make("zero_length", Repetition::REQUIRED,
NoLogicalType::Make(), Type::FIXED_LEN_BYTE_ARRAY,
0));
// Non-nested logical type on group node ...
ASSERT_ANY_THROW(
GroupNode::Make("list", Repetition::REPEATED, {}, JSONLogicalType::Make()));
// nullptr logical type arguments convert to NoLogicalType/ConvertedType::NONE
std::shared_ptr<const LogicalType> empty;
NodePtr node;
ASSERT_NO_THROW(
node = PrimitiveNode::Make("value", Repetition::REQUIRED, empty, Type::DOUBLE));
ASSERT_TRUE(node->logical_type()->is_none());
ASSERT_EQ(node->converted_type(), ConvertedType::NONE);
ASSERT_NO_THROW(node = GroupNode::Make("items", Repetition::REPEATED, {}, empty));
ASSERT_TRUE(node->logical_type()->is_none());
ASSERT_EQ(node->converted_type(), ConvertedType::NONE);
// Invalid ConvertedType in deserialized element ...
node = PrimitiveNode::Make("string", Repetition::REQUIRED, StringLogicalType::Make(),
Type::BYTE_ARRAY);
ASSERT_EQ(node->logical_type()->type(), LogicalType::Type::STRING);
ASSERT_TRUE(node->logical_type()->is_valid());
ASSERT_TRUE(node->logical_type()->is_serialized());
format::SchemaElement string_intermediary;
node->ToParquet(&string_intermediary);
// ... corrupt the Thrift intermediary ....
string_intermediary.logicalType.__isset.STRING = false;
ASSERT_ANY_THROW(node = PrimitiveNode::FromParquet(&string_intermediary, 1));
// Invalid TimeUnit in deserialized TimeLogicalType ...
node = PrimitiveNode::Make("time", Repetition::REQUIRED,
TimeLogicalType::Make(true, LogicalType::TimeUnit::NANOS),
Type::INT64);
format::SchemaElement time_intermediary;
node->ToParquet(&time_intermediary);
// ... corrupt the Thrift intermediary ....
time_intermediary.logicalType.TIME.unit.__isset.NANOS = false;
ASSERT_ANY_THROW(PrimitiveNode::FromParquet(&time_intermediary, 1));
// Invalid TimeUnit in deserialized TimestampLogicalType ...
node = PrimitiveNode::Make(
"timestamp", Repetition::REQUIRED,
TimestampLogicalType::Make(true, LogicalType::TimeUnit::NANOS), Type::INT64);
format::SchemaElement timestamp_intermediary;
node->ToParquet(&timestamp_intermediary);
// ... corrupt the Thrift intermediary ....
timestamp_intermediary.logicalType.TIMESTAMP.unit.__isset.NANOS = false;
ASSERT_ANY_THROW(PrimitiveNode::FromParquet(&timestamp_intermediary, 1));
}
struct SchemaElementConstructionArguments {
std::string name;
std::shared_ptr<const LogicalType> logical_type;
Type::type physical_type;
int physical_length;
bool expect_converted_type;
ConvertedType::type converted_type;
bool expect_logicalType;
std::function<bool()> check_logicalType;
};
struct LegacySchemaElementConstructionArguments {
std::string name;
Type::type physical_type;
int physical_length;
bool expect_converted_type;
ConvertedType::type converted_type;
bool expect_logicalType;
std::function<bool()> check_logicalType;
};
class TestSchemaElementConstruction : public ::testing::Test {
public:
TestSchemaElementConstruction* Reconstruct(
const SchemaElementConstructionArguments& c) {
// Make node, create serializable Thrift object from it ...
node_ = PrimitiveNode::Make(c.name, Repetition::REQUIRED, c.logical_type,
c.physical_type, c.physical_length);
element_.reset(new format::SchemaElement);
node_->ToParquet(element_.get());
// ... then set aside some values for later inspection.
name_ = c.name;
expect_converted_type_ = c.expect_converted_type;
converted_type_ = c.converted_type;
expect_logicalType_ = c.expect_logicalType;
check_logicalType_ = c.check_logicalType;
return this;
}
TestSchemaElementConstruction* LegacyReconstruct(
const LegacySchemaElementConstructionArguments& c) {
// Make node, create serializable Thrift object from it ...
node_ = PrimitiveNode::Make(c.name, Repetition::REQUIRED, c.physical_type,
c.converted_type, c.physical_length);
element_.reset(new format::SchemaElement);
node_->ToParquet(element_.get());
// ... then set aside some values for later inspection.
name_ = c.name;
expect_converted_type_ = c.expect_converted_type;
converted_type_ = c.converted_type;
expect_logicalType_ = c.expect_logicalType;
check_logicalType_ = c.check_logicalType;
return this;
}
void Inspect() {
ASSERT_EQ(element_->name, name_);
if (expect_converted_type_) {
ASSERT_TRUE(element_->__isset.converted_type)
<< node_->logical_type()->ToString()
<< " logical type unexpectedly failed to generate a converted type in the "
"Thrift "
"intermediate object";
ASSERT_EQ(element_->converted_type, ToThrift(converted_type_))
<< node_->logical_type()->ToString()
<< " logical type unexpectedly failed to generate correct converted type in "
"the "
"Thrift intermediate object";
} else {
ASSERT_FALSE(element_->__isset.converted_type)
<< node_->logical_type()->ToString()
<< " logical type unexpectedly generated a converted type in the Thrift "
"intermediate object";
}
if (expect_logicalType_) {
ASSERT_TRUE(element_->__isset.logicalType)
<< node_->logical_type()->ToString()
<< " logical type unexpectedly failed to genverate a logicalType in the Thrift "
"intermediate object";
ASSERT_TRUE(check_logicalType_())
<< node_->logical_type()->ToString()
<< " logical type generated incorrect logicalType "
"settings in the Thrift intermediate object";
} else {
ASSERT_FALSE(element_->__isset.logicalType)
<< node_->logical_type()->ToString()
<< " logical type unexpectedly generated a logicalType in the Thrift "
"intermediate object";
}
return;
}
protected:
NodePtr node_;
std::unique_ptr<format::SchemaElement> element_;
std::string name_;
bool expect_converted_type_;
ConvertedType::type converted_type_; // expected converted type in Thrift object
bool expect_logicalType_;
std::function<bool()> check_logicalType_; // specialized (by logical type)
// logicalType check for Thrift object
};
/*
* The Test*SchemaElementConstruction suites confirm that the logical type
* and converted type members of the Thrift intermediate message object
* (format::SchemaElement) that is created upon serialization of an annotated
* schema node are correctly populated.
*/
TEST_F(TestSchemaElementConstruction, SimpleCases) {
auto check_nothing = []() {
return true;
}; // used for logical types that don't expect a logicalType to be set
std::vector<SchemaElementConstructionArguments> cases = {
{"string", LogicalType::String(), Type::BYTE_ARRAY, -1, true, ConvertedType::UTF8,
true, [this]() { return element_->logicalType.__isset.STRING; }},
{"enum", LogicalType::Enum(), Type::BYTE_ARRAY, -1, true, ConvertedType::ENUM, true,
[this]() { return element_->logicalType.__isset.ENUM; }},
{"date", LogicalType::Date(), Type::INT32, -1, true, ConvertedType::DATE, true,
[this]() { return element_->logicalType.__isset.DATE; }},
{"interval", LogicalType::Interval(), Type::FIXED_LEN_BYTE_ARRAY, 12, true,
ConvertedType::INTERVAL, false, check_nothing},
{"null", LogicalType::Null(), Type::DOUBLE, -1, false, ConvertedType::NA, true,
[this]() { return element_->logicalType.__isset.UNKNOWN; }},
{"json", LogicalType::JSON(), Type::BYTE_ARRAY, -1, true, ConvertedType::JSON, true,
[this]() { return element_->logicalType.__isset.JSON; }},
{"bson", LogicalType::BSON(), Type::BYTE_ARRAY, -1, true, ConvertedType::BSON, true,
[this]() { return element_->logicalType.__isset.BSON; }},
{"uuid", LogicalType::UUID(), Type::FIXED_LEN_BYTE_ARRAY, 16, false,
ConvertedType::NA, true, [this]() { return element_->logicalType.__isset.UUID; }},
{"none", LogicalType::None(), Type::INT64, -1, false, ConvertedType::NA, false,
check_nothing},
{"unknown", LogicalType::Unknown(), Type::INT64, -1, true, ConvertedType::NA, false,
check_nothing}};
for (const SchemaElementConstructionArguments& c : cases) {
this->Reconstruct(c)->Inspect();
}
std::vector<LegacySchemaElementConstructionArguments> legacy_cases = {
{"timestamp_ms", Type::INT64, -1, true, ConvertedType::TIMESTAMP_MILLIS, false,
check_nothing},
{"timestamp_us", Type::INT64, -1, true, ConvertedType::TIMESTAMP_MICROS, false,
check_nothing},
};
for (const LegacySchemaElementConstructionArguments& c : legacy_cases) {
this->LegacyReconstruct(c)->Inspect();
}
}
class TestDecimalSchemaElementConstruction : public TestSchemaElementConstruction {
public:
TestDecimalSchemaElementConstruction* Reconstruct(
const SchemaElementConstructionArguments& c) {
TestSchemaElementConstruction::Reconstruct(c);
const auto& decimal_logical_type =
checked_cast<const DecimalLogicalType&>(*c.logical_type);
precision_ = decimal_logical_type.precision();
scale_ = decimal_logical_type.scale();
return this;
}
void Inspect() {
TestSchemaElementConstruction::Inspect();
ASSERT_EQ(element_->precision, precision_);
ASSERT_EQ(element_->scale, scale_);
ASSERT_EQ(element_->logicalType.DECIMAL.precision, precision_);
ASSERT_EQ(element_->logicalType.DECIMAL.scale, scale_);
return;
}
protected:
int32_t precision_;
int32_t scale_;
};
TEST_F(TestDecimalSchemaElementConstruction, DecimalCases) {
auto check_DECIMAL = [this]() { return element_->logicalType.__isset.DECIMAL; };
std::vector<SchemaElementConstructionArguments> cases = {
{"decimal", LogicalType::Decimal(16, 6), Type::INT64, -1, true,
ConvertedType::DECIMAL, true, check_DECIMAL},
{"decimal", LogicalType::Decimal(1, 0), Type::INT32, -1, true,
ConvertedType::DECIMAL, true, check_DECIMAL},
{"decimal", LogicalType::Decimal(10), Type::INT64, -1, true, ConvertedType::DECIMAL,
true, check_DECIMAL},
{"decimal", LogicalType::Decimal(11, 11), Type::INT64, -1, true,
ConvertedType::DECIMAL, true, check_DECIMAL},
};
for (const SchemaElementConstructionArguments& c : cases) {
this->Reconstruct(c)->Inspect();
}
}
class TestTemporalSchemaElementConstruction : public TestSchemaElementConstruction {
public:
template <typename T>
TestTemporalSchemaElementConstruction* Reconstruct(
const SchemaElementConstructionArguments& c) {
TestSchemaElementConstruction::Reconstruct(c);
const auto& t = checked_cast<const T&>(*c.logical_type);
adjusted_ = t.is_adjusted_to_utc();
unit_ = t.time_unit();
return this;
}
template <typename T>
void Inspect() {
FAIL() << "Invalid typename specified in test suite";
return;
}
protected:
bool adjusted_;
LogicalType::TimeUnit::unit unit_;
};
template <>
void TestTemporalSchemaElementConstruction::Inspect<format::TimeType>() {
TestSchemaElementConstruction::Inspect();
ASSERT_EQ(element_->logicalType.TIME.isAdjustedToUTC, adjusted_);
switch (unit_) {
case LogicalType::TimeUnit::MILLIS:
ASSERT_TRUE(element_->logicalType.TIME.unit.__isset.MILLIS);
break;
case LogicalType::TimeUnit::MICROS:
ASSERT_TRUE(element_->logicalType.TIME.unit.__isset.MICROS);
break;
case LogicalType::TimeUnit::NANOS:
ASSERT_TRUE(element_->logicalType.TIME.unit.__isset.NANOS);
break;
case LogicalType::TimeUnit::UNKNOWN:
default:
FAIL() << "Invalid time unit in test case";
}
return;
}
template <>
void TestTemporalSchemaElementConstruction::Inspect<format::TimestampType>() {
TestSchemaElementConstruction::Inspect();
ASSERT_EQ(element_->logicalType.TIMESTAMP.isAdjustedToUTC, adjusted_);
switch (unit_) {
case LogicalType::TimeUnit::MILLIS:
ASSERT_TRUE(element_->logicalType.TIMESTAMP.unit.__isset.MILLIS);
break;
case LogicalType::TimeUnit::MICROS:
ASSERT_TRUE(element_->logicalType.TIMESTAMP.unit.__isset.MICROS);
break;
case LogicalType::TimeUnit::NANOS:
ASSERT_TRUE(element_->logicalType.TIMESTAMP.unit.__isset.NANOS);
break;
case LogicalType::TimeUnit::UNKNOWN:
default:
FAIL() << "Invalid time unit in test case";
}
return;
}
TEST_F(TestTemporalSchemaElementConstruction, TemporalCases) {
auto check_TIME = [this]() { return element_->logicalType.__isset.TIME; };
std::vector<SchemaElementConstructionArguments> time_cases = {
{"time_T_ms", LogicalType::Time(true, LogicalType::TimeUnit::MILLIS), Type::INT32,
-1, true, ConvertedType::TIME_MILLIS, true, check_TIME},
{"time_F_ms", LogicalType::Time(false, LogicalType::TimeUnit::MILLIS), Type::INT32,
-1, false, ConvertedType::NA, true, check_TIME},
{"time_T_us", LogicalType::Time(true, LogicalType::TimeUnit::MICROS), Type::INT64,
-1, true, ConvertedType::TIME_MICROS, true, check_TIME},
{"time_F_us", LogicalType::Time(false, LogicalType::TimeUnit::MICROS), Type::INT64,
-1, false, ConvertedType::NA, true, check_TIME},
{"time_T_ns", LogicalType::Time(true, LogicalType::TimeUnit::NANOS), Type::INT64,
-1, false, ConvertedType::NA, true, check_TIME},
{"time_F_ns", LogicalType::Time(false, LogicalType::TimeUnit::NANOS), Type::INT64,
-1, false, ConvertedType::NA, true, check_TIME},
};
for (const SchemaElementConstructionArguments& c : time_cases) {
this->Reconstruct<TimeLogicalType>(c)->Inspect<format::TimeType>();
}
auto check_TIMESTAMP = [this]() { return element_->logicalType.__isset.TIMESTAMP; };
std::vector<SchemaElementConstructionArguments> timestamp_cases = {
{"timestamp_T_ms", LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS),
Type::INT64, -1, true, ConvertedType::TIMESTAMP_MILLIS, true, check_TIMESTAMP},
{"timestamp_F_ms", LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS),
Type::INT64, -1, false, ConvertedType::NA, true, check_TIMESTAMP},
{"timestamp_F_ms_force",
LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS,
/*is_from_converted_type=*/false,
/*force_set_converted_type=*/true),
Type::INT64, -1, true, ConvertedType::TIMESTAMP_MILLIS, true, check_TIMESTAMP},
{"timestamp_T_us", LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS),
Type::INT64, -1, true, ConvertedType::TIMESTAMP_MICROS, true, check_TIMESTAMP},
{"timestamp_F_us", LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS),
Type::INT64, -1, false, ConvertedType::NA, true, check_TIMESTAMP},
{"timestamp_F_us_force",
LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS,
/*is_from_converted_type=*/false,
/*force_set_converted_type=*/true),
Type::INT64, -1, true, ConvertedType::TIMESTAMP_MILLIS, true, check_TIMESTAMP},
{"timestamp_T_ns", LogicalType::Timestamp(true, LogicalType::TimeUnit::NANOS),
Type::INT64, -1, false, ConvertedType::NA, true, check_TIMESTAMP},
{"timestamp_F_ns", LogicalType::Timestamp(false, LogicalType::TimeUnit::NANOS),
Type::INT64, -1, false, ConvertedType::NA, true, check_TIMESTAMP},
};
for (const SchemaElementConstructionArguments& c : timestamp_cases) {
this->Reconstruct<TimestampLogicalType>(c)->Inspect<format::TimestampType>();
}
}
class TestIntegerSchemaElementConstruction : public TestSchemaElementConstruction {
public:
TestIntegerSchemaElementConstruction* Reconstruct(
const SchemaElementConstructionArguments& c) {
TestSchemaElementConstruction::Reconstruct(c);
const auto& int_logical_type = checked_cast<const IntLogicalType&>(*c.logical_type);
width_ = int_logical_type.bit_width();
signed_ = int_logical_type.is_signed();
return this;
}
void Inspect() {
TestSchemaElementConstruction::Inspect();
ASSERT_EQ(element_->logicalType.INTEGER.bitWidth, width_);
ASSERT_EQ(element_->logicalType.INTEGER.isSigned, signed_);
return;
}
protected:
int width_;
bool signed_;
};
TEST_F(TestIntegerSchemaElementConstruction, IntegerCases) {
auto check_INTEGER = [this]() { return element_->logicalType.__isset.INTEGER; };
std::vector<SchemaElementConstructionArguments> cases = {
{"uint8", LogicalType::Int(8, false), Type::INT32, -1, true, ConvertedType::UINT_8,
true, check_INTEGER},
{"uint16", LogicalType::Int(16, false), Type::INT32, -1, true,
ConvertedType::UINT_16, true, check_INTEGER},
{"uint32", LogicalType::Int(32, false), Type::INT32, -1, true,
ConvertedType::UINT_32, true, check_INTEGER},
{"uint64", LogicalType::Int(64, false), Type::INT64, -1, true,
ConvertedType::UINT_64, true, check_INTEGER},
{"int8", LogicalType::Int(8, true), Type::INT32, -1, true, ConvertedType::INT_8,
true, check_INTEGER},
{"int16", LogicalType::Int(16, true), Type::INT32, -1, true, ConvertedType::INT_16,
true, check_INTEGER},
{"int32", LogicalType::Int(32, true), Type::INT32, -1, true, ConvertedType::INT_32,
true, check_INTEGER},
{"int64", LogicalType::Int(64, true), Type::INT64, -1, true, ConvertedType::INT_64,
true, check_INTEGER},
};
for (const SchemaElementConstructionArguments& c : cases) {
this->Reconstruct(c)->Inspect();
}
}
TEST(TestLogicalTypeSerialization, SchemaElementNestedCases) {
// Confirm that the intermediate Thrift objects created during node serialization
// contain correct ConvertedType and ConvertedType information
NodePtr string_node = PrimitiveNode::Make("string", Repetition::REQUIRED,
StringLogicalType::Make(), Type::BYTE_ARRAY);
NodePtr date_node = PrimitiveNode::Make("date", Repetition::REQUIRED,
DateLogicalType::Make(), Type::INT32);
NodePtr json_node = PrimitiveNode::Make("json", Repetition::REQUIRED,
JSONLogicalType::Make(), Type::BYTE_ARRAY);
NodePtr uuid_node =
PrimitiveNode::Make("uuid", Repetition::REQUIRED, UUIDLogicalType::Make(),
Type::FIXED_LEN_BYTE_ARRAY, 16);
NodePtr timestamp_node = PrimitiveNode::Make(
"timestamp", Repetition::REQUIRED,
TimestampLogicalType::Make(false, LogicalType::TimeUnit::NANOS), Type::INT64);
NodePtr int_node = PrimitiveNode::Make("int", Repetition::REQUIRED,
IntLogicalType::Make(64, false), Type::INT64);
NodePtr decimal_node = PrimitiveNode::Make(
"decimal", Repetition::REQUIRED, DecimalLogicalType::Make(16, 6), Type::INT64);
NodePtr list_node = GroupNode::Make("list", Repetition::REPEATED,
{string_node, date_node, json_node, uuid_node,
timestamp_node, int_node, decimal_node},
ListLogicalType::Make());
std::vector<format::SchemaElement> list_elements;
ToParquet(reinterpret_cast<GroupNode*>(list_node.get()), &list_elements);
ASSERT_EQ(list_elements[0].name, "list");
ASSERT_TRUE(list_elements[0].__isset.converted_type);
ASSERT_TRUE(list_elements[0].__isset.logicalType);
ASSERT_EQ(list_elements[0].converted_type, ToThrift(ConvertedType::LIST));
ASSERT_TRUE(list_elements[0].logicalType.__isset.LIST);
ASSERT_TRUE(list_elements[1].logicalType.__isset.STRING);
ASSERT_TRUE(list_elements[2].logicalType.__isset.DATE);
ASSERT_TRUE(list_elements[3].logicalType.__isset.JSON);
ASSERT_TRUE(list_elements[4].logicalType.__isset.UUID);
ASSERT_TRUE(list_elements[5].logicalType.__isset.TIMESTAMP);
ASSERT_TRUE(list_elements[6].logicalType.__isset.INTEGER);
ASSERT_TRUE(list_elements[7].logicalType.__isset.DECIMAL);
NodePtr map_node =
GroupNode::Make("map", Repetition::REQUIRED, {}, MapLogicalType::Make());
std::vector<format::SchemaElement> map_elements;
ToParquet(reinterpret_cast<GroupNode*>(map_node.get()), &map_elements);
ASSERT_EQ(map_elements[0].name, "map");
ASSERT_TRUE(map_elements[0].__isset.converted_type);
ASSERT_TRUE(map_elements[0].__isset.logicalType);
ASSERT_EQ(map_elements[0].converted_type, ToThrift(ConvertedType::MAP));
ASSERT_TRUE(map_elements[0].logicalType.__isset.MAP);
}
static void ConfirmPrimitiveNodeRoundtrip(
const std::shared_ptr<const LogicalType>& logical_type, Type::type physical_type,
int physical_length) {
std::shared_ptr<Node> original = PrimitiveNode::Make(
"something", Repetition::REQUIRED, logical_type, physical_type, physical_length);
format::SchemaElement intermediary;
original->ToParquet(&intermediary);
std::unique_ptr<Node> recovered = PrimitiveNode::FromParquet(&intermediary, 1);
ASSERT_TRUE(original->Equals(recovered.get()))
<< "Recovered primitive node unexpectedly not equivalent to original primitive "
"node constructed with logical type "
<< logical_type->ToString();
return;
}
static void ConfirmGroupNodeRoundtrip(
std::string name, const std::shared_ptr<const LogicalType>& logical_type) {
NodeVector node_vector;
std::shared_ptr<Node> original =
GroupNode::Make(name, Repetition::REQUIRED, node_vector, logical_type);
std::vector<format::SchemaElement> elements;
ToParquet(reinterpret_cast<GroupNode*>(original.get()), &elements);
std::unique_ptr<Node> recovered =
GroupNode::FromParquet(&(elements[0]), 1, node_vector);
ASSERT_TRUE(original->Equals(recovered.get()))
<< "Recovered group node unexpectedly not equivalent to original group node "
"constructed with logical type "
<< logical_type->ToString();
return;
}
TEST(TestLogicalTypeSerialization, Roundtrips) {
// Confirm that Thrift serialization-deserialization of nodes with logical
// types produces equivalent reconstituted nodes
// Primitive nodes ...
struct AnnotatedPrimitiveNodeFactoryArguments {
std::shared_ptr<const LogicalType> logical_type;
Type::type physical_type;
int physical_length;
};
std::vector<AnnotatedPrimitiveNodeFactoryArguments> cases = {
{LogicalType::String(), Type::BYTE_ARRAY, -1},
{LogicalType::Enum(), Type::BYTE_ARRAY, -1},
{LogicalType::Decimal(16, 6), Type::INT64, -1},
{LogicalType::Date(), Type::INT32, -1},
{LogicalType::Time(true, LogicalType::TimeUnit::MILLIS), Type::INT32, -1},
{LogicalType::Time(true, LogicalType::TimeUnit::MICROS), Type::INT64, -1},
{LogicalType::Time(true, LogicalType::TimeUnit::NANOS), Type::INT64, -1},
{LogicalType::Time(false, LogicalType::TimeUnit::MILLIS), Type::INT32, -1},
{LogicalType::Time(false, LogicalType::TimeUnit::MICROS), Type::INT64, -1},
{LogicalType::Time(false, LogicalType::TimeUnit::NANOS), Type::INT64, -1},
{LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS), Type::INT64, -1},
{LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS), Type::INT64, -1},
{LogicalType::Timestamp(true, LogicalType::TimeUnit::NANOS), Type::INT64, -1},
{LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS), Type::INT64, -1},
{LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS), Type::INT64, -1},
{LogicalType::Timestamp(false, LogicalType::TimeUnit::NANOS), Type::INT64, -1},
{LogicalType::Interval(), Type::FIXED_LEN_BYTE_ARRAY, 12},
{LogicalType::Int(8, false), Type::INT32, -1},
{LogicalType::Int(16, false), Type::INT32, -1},
{LogicalType::Int(32, false), Type::INT32, -1},
{LogicalType::Int(64, false), Type::INT64, -1},
{LogicalType::Int(8, true), Type::INT32, -1},
{LogicalType::Int(16, true), Type::INT32, -1},
{LogicalType::Int(32, true), Type::INT32, -1},
{LogicalType::Int(64, true), Type::INT64, -1},
{LogicalType::Null(), Type::BOOLEAN, -1},
{LogicalType::JSON(), Type::BYTE_ARRAY, -1},
{LogicalType::BSON(), Type::BYTE_ARRAY, -1},
{LogicalType::UUID(), Type::FIXED_LEN_BYTE_ARRAY, 16},
{LogicalType::None(), Type::BOOLEAN, -1}};
for (const AnnotatedPrimitiveNodeFactoryArguments& c : cases) {
ConfirmPrimitiveNodeRoundtrip(c.logical_type, c.physical_type, c.physical_length);
}
// Group nodes ...
ConfirmGroupNodeRoundtrip("map", LogicalType::Map());
ConfirmGroupNodeRoundtrip("list", LogicalType::List());
}
} // namespace schema
} // namespace parquet