PARQUET-1164: [C++] Account for API changes in ARROW-1808
Author: Wes McKinney <wes.mckinney@twosigma.com>
Closes #418 from wesm/PARQUET-1164 and squashes the following commits:
ca18e60 [Wes McKinney] Bump Arrow version to include ARROW-1808
d580b4f [Wes McKinney] Refactor to account for ARROW-1808
diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake
index fe1d499..53630e6 100644
--- a/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cmake_modules/ThirdpartyToolchain.cmake
@@ -366,7 +366,7 @@
-DARROW_BUILD_TESTS=OFF)
if ("$ENV{PARQUET_ARROW_VERSION}" STREQUAL "")
- set(ARROW_VERSION "f2806fa518583907a129b2ecb0b7ec8758b69e17")
+ set(ARROW_VERSION "fc4e2c36d2c56a8bd5d1ab17eeb406826924d3e5")
else()
set(ARROW_VERSION "$ENV{PARQUET_ARROW_VERSION}")
endif()
diff --git a/src/parquet/arrow/arrow-reader-writer-benchmark.cc b/src/parquet/arrow/arrow-reader-writer-benchmark.cc
index a54fb5d..edeef1e 100644
--- a/src/parquet/arrow/arrow-reader-writer-benchmark.cc
+++ b/src/parquet/arrow/arrow-reader-writer-benchmark.cc
@@ -112,11 +112,9 @@
EXIT_NOT_OK(builder.Finish(&array));
auto field = ::arrow::field("column", type, nullable);
- auto schema = std::make_shared<::arrow::Schema>(
- std::vector<std::shared_ptr<::arrow::Field>>({field}));
+ auto schema = ::arrow::schema({field});
auto column = std::make_shared<::arrow::Column>(field, array);
- return std::make_shared<::arrow::Table>(
- schema, std::vector<std::shared_ptr<::arrow::Column>>({column}));
+ return ::arrow::Table::Make(schema, {column});
}
template <>
@@ -139,8 +137,7 @@
auto schema = std::make_shared<::arrow::Schema>(
std::vector<std::shared_ptr<::arrow::Field>>({field}));
auto column = std::make_shared<::arrow::Column>(field, array);
- return std::make_shared<::arrow::Table>(
- schema, std::vector<std::shared_ptr<::arrow::Column>>({column}));
+ return ::arrow::Table::Make(schema, {column});
}
template <bool nullable, typename ParquetType>
diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc
index 0e0831e..a8d3824 100644
--- a/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -23,8 +23,8 @@
#include "gtest/gtest.h"
-#include <sstream>
#include <arrow/compute/api.h>
+#include <sstream>
#include "parquet/api/reader.h"
#include "parquet/api/writer.h"
@@ -1145,7 +1145,7 @@
std::make_shared<Column>("f0", a0), std::make_shared<Column>("f1", a1),
std::make_shared<Column>("f2", a2), std::make_shared<Column>("f3", a3),
std::make_shared<Column>("f4", a4), std::make_shared<Column>("f5", a5)};
- *out = std::make_shared<::arrow::Table>(schema, columns);
+ *out = Table::Make(schema, columns);
}
TEST(TestArrowReadWrite, DateTimeTypes) {
@@ -1199,31 +1199,28 @@
auto s1 = std::shared_ptr<::arrow::Schema>(
new ::arrow::Schema({field("f_s", t_s), field("f_ms", t_ms), field("f_us", t_us),
field("f_ns", t_ns)}));
- auto input = std::make_shared<::arrow::Table>(
- s1, ColumnVector({std::make_shared<Column>("f_s", a_s),
- std::make_shared<Column>("f_ms", a_ms),
- std::make_shared<Column>("f_us", a_us),
- std::make_shared<Column>("f_ns", a_ns)}));
+ auto input = Table::Make(
+ s1,
+ {std::make_shared<Column>("f_s", a_s), std::make_shared<Column>("f_ms", a_ms),
+ std::make_shared<Column>("f_us", a_us), std::make_shared<Column>("f_ns", a_ns)});
// Result when coercing to milliseconds
auto s2 = std::shared_ptr<::arrow::Schema>(
new ::arrow::Schema({field("f_s", t_ms), field("f_ms", t_ms), field("f_us", t_ms),
field("f_ns", t_ms)}));
- auto ex_milli_result = std::make_shared<::arrow::Table>(
- s2, ColumnVector({std::make_shared<Column>("f_s", a_ms),
- std::make_shared<Column>("f_ms", a_ms),
- std::make_shared<Column>("f_us", a_ms),
- std::make_shared<Column>("f_ns", a_ms)}));
+ auto ex_milli_result = Table::Make(
+ s2,
+ {std::make_shared<Column>("f_s", a_ms), std::make_shared<Column>("f_ms", a_ms),
+ std::make_shared<Column>("f_us", a_ms), std::make_shared<Column>("f_ns", a_ms)});
// Result when coercing to microseconds
auto s3 = std::shared_ptr<::arrow::Schema>(
new ::arrow::Schema({field("f_s", t_us), field("f_ms", t_us), field("f_us", t_us),
field("f_ns", t_us)}));
- auto ex_micro_result = std::make_shared<::arrow::Table>(
- s3, ColumnVector({std::make_shared<Column>("f_s", a_us),
- std::make_shared<Column>("f_ms", a_us),
- std::make_shared<Column>("f_us", a_us),
- std::make_shared<Column>("f_ns", a_us)}));
+ auto ex_micro_result = Table::Make(
+ s3,
+ {std::make_shared<Column>("f_s", a_us), std::make_shared<Column>("f_ms", a_us),
+ std::make_shared<Column>("f_us", a_us), std::make_shared<Column>("f_ns", a_us)});
std::shared_ptr<Table> milli_result;
DoSimpleRoundtrip(
@@ -1276,10 +1273,10 @@
auto c3 = std::make_shared<Column>("f_us", a_us);
auto c4 = std::make_shared<Column>("f_ns", a_ns);
- auto t1 = std::make_shared<::arrow::Table>(s1, ColumnVector({c1}));
- auto t2 = std::make_shared<::arrow::Table>(s2, ColumnVector({c2}));
- auto t3 = std::make_shared<::arrow::Table>(s3, ColumnVector({c3}));
- auto t4 = std::make_shared<::arrow::Table>(s4, ColumnVector({c4}));
+ auto t1 = Table::Make(s1, {c1});
+ auto t2 = Table::Make(s2, {c2});
+ auto t3 = Table::Make(s3, {c3});
+ auto t4 = Table::Make(s4, {c4});
auto sink = std::make_shared<InMemoryOutputStream>();
@@ -1327,7 +1324,7 @@
std::vector<std::shared_ptr<::arrow::Column>> columns = {
std::make_shared<Column>("f0", a0), std::make_shared<Column>("f1", a1)};
- auto table = std::make_shared<::arrow::Table>(schema, columns);
+ auto table = Table::Make(schema, columns);
// Expected schema and values
auto e0 = field("f0", ::arrow::date32());
@@ -1341,7 +1338,7 @@
std::vector<std::shared_ptr<::arrow::Column>> ex_columns = {
std::make_shared<Column>("f0", x0), std::make_shared<Column>("f1", x1)};
- auto ex_table = std::make_shared<::arrow::Table>(ex_schema, ex_columns);
+ auto ex_table = Table::Make(ex_schema, ex_columns);
std::shared_ptr<Table> result;
DoSimpleRoundtrip(table, 1, table->num_rows(), {}, &result);
@@ -1372,7 +1369,7 @@
fields[i] = column->field();
}
auto schema = std::make_shared<::arrow::Schema>(fields);
- *out = std::make_shared<Table>(schema, columns);
+ *out = Table::Make(schema, columns);
}
TEST(TestArrowReadWrite, MultithreadedRead) {
@@ -1459,9 +1456,9 @@
ex_fields.push_back(table->column(i)->field());
}
- auto ex_schema = std::make_shared<::arrow::Schema>(ex_fields);
- Table expected(ex_schema, ex_columns);
- AssertTablesEqual(expected, *result);
+ auto ex_schema = ::arrow::schema(ex_fields);
+ auto expected = Table::Make(ex_schema, ex_columns);
+ AssertTablesEqual(*expected, *result);
}
void MakeListTable(int num_rows, std::shared_ptr<Table>* out) {
@@ -1501,7 +1498,7 @@
auto f1 = ::arrow::field("a", ::arrow::list(::arrow::int8()));
auto schema = ::arrow::schema({f1});
std::vector<std::shared_ptr<Array>> arrays = {list_array};
- *out = std::make_shared<Table>(schema, arrays);
+ *out = Table::Make(schema, arrays);
}
TEST(TestArrowReadWrite, ListLargeRecords) {
@@ -1544,7 +1541,7 @@
auto chunked_col =
std::make_shared<::arrow::Column>(table->schema()->field(0), chunked);
std::vector<std::shared_ptr<::arrow::Column>> columns = {chunked_col};
- auto chunked_table = std::make_shared<Table>(table->schema(), columns);
+ auto chunked_table = Table::Make(table->schema(), columns);
ASSERT_TRUE(table->Equals(*chunked_table));
}
diff --git a/src/parquet/arrow/arrow-schema-test.cc b/src/parquet/arrow/arrow-schema-test.cc
index 7ed9ad8..129eccf 100644
--- a/src/parquet/arrow/arrow-schema-test.cc
+++ b/src/parquet/arrow/arrow-schema-test.cc
@@ -62,8 +62,8 @@
for (int i = 0; i < expected_schema->num_fields(); ++i) {
auto lhs = result_schema_->field(i);
auto rhs = expected_schema->field(i);
- EXPECT_TRUE(lhs->Equals(rhs))
- << i << " " << lhs->ToString() << " != " << rhs->ToString();
+ EXPECT_TRUE(lhs->Equals(rhs)) << i << " " << lhs->ToString()
+ << " != " << rhs->ToString();
}
}
diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc
index 3ca49cb..e13a094 100644
--- a/src/parquet/arrow/reader.cc
+++ b/src/parquet/arrow/reader.cc
@@ -431,7 +431,7 @@
RETURN_NOT_OK(ParallelFor(nthreads, num_columns, ReadColumnFunc));
}
- *out = std::make_shared<Table>(schema, columns);
+ *out = Table::Make(schema, columns);
return Status::OK();
}
@@ -466,7 +466,7 @@
RETURN_NOT_OK(ParallelFor(nthreads, num_fields, ReadColumnFunc));
}
- *table = std::make_shared<Table>(schema, columns);
+ *table = Table::Make(schema, columns);
return Status::OK();
}
diff --git a/src/parquet/arrow/test-util.h b/src/parquet/arrow/test-util.h
index 8611a30..7264324 100644
--- a/src/parquet/arrow/test-util.h
+++ b/src/parquet/arrow/test-util.h
@@ -414,7 +414,7 @@
std::vector<std::shared_ptr<::arrow::Column>> columns({column});
std::vector<std::shared_ptr<::arrow::Field>> fields({column->field()});
auto schema = std::make_shared<::arrow::Schema>(fields);
- return std::make_shared<::arrow::Table>(schema, columns);
+ return ::arrow::Table::Make(schema, columns);
}
template <typename T>
diff --git a/src/parquet/file/reader.cc b/src/parquet/file/reader.cc
index 4ec48a4..9b9bde9 100644
--- a/src/parquet/file/reader.cc
+++ b/src/parquet/file/reader.cc
@@ -45,9 +45,9 @@
: contents_(std::move(contents)) {}
std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) {
- DCHECK(i < metadata()->num_columns())
- << "The RowGroup only has " << metadata()->num_columns()
- << "columns, requested column: " << i;
+ DCHECK(i < metadata()->num_columns()) << "The RowGroup only has "
+ << metadata()->num_columns()
+ << "columns, requested column: " << i;
const ColumnDescriptor* descr = metadata()->schema()->Column(i);
std::unique_ptr<PageReader> page_reader = contents_->GetColumnPageReader(i);
@@ -57,9 +57,9 @@
}
std::unique_ptr<PageReader> RowGroupReader::GetColumnPageReader(int i) {
- DCHECK(i < metadata()->num_columns())
- << "The RowGroup only has " << metadata()->num_columns()
- << "columns, requested column: " << i;
+ DCHECK(i < metadata()->num_columns()) << "The RowGroup only has "
+ << metadata()->num_columns()
+ << "columns, requested column: " << i;
return contents_->GetColumnPageReader(i);
}
@@ -127,9 +127,9 @@
}
std::shared_ptr<RowGroupReader> ParquetFileReader::RowGroup(int i) {
- DCHECK(i < metadata()->num_row_groups())
- << "The file only has " << metadata()->num_row_groups()
- << "row groups, requested reader for: " << i;
+ DCHECK(i < metadata()->num_row_groups()) << "The file only has "
+ << metadata()->num_row_groups()
+ << "row groups, requested reader for: " << i;
return contents_->GetRowGroup(i);
}