blob: 626e58c23af6135b2f54695eacc8f6d68df8e6da [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "./arrow_types.h"
#if defined(ARROW_R_WITH_ARROW)
#include <arrow/table.h>
#include <parquet/arrow/reader.h>
#include <parquet/arrow/writer.h>
#include <parquet/exception.h>
RCPP_EXPOSED_ENUM_NODECL(parquet::ParquetVersion::type)
// [[arrow::export]]
std::shared_ptr<parquet::ArrowReaderProperties>
parquet___arrow___ArrowReaderProperties__Make(bool use_threads) {
return std::make_shared<parquet::ArrowReaderProperties>(use_threads);
}
// [[arrow::export]]
void parquet___arrow___ArrowReaderProperties__set_use_threads(
const std::shared_ptr<parquet::ArrowReaderProperties>& properties, bool use_threads) {
properties->set_use_threads(use_threads);
}
// [[arrow::export]]
bool parquet___arrow___ArrowReaderProperties__get_use_threads(
const std::shared_ptr<parquet::ArrowReaderProperties>& properties, bool use_threads) {
return properties->use_threads();
}
// [[arrow::export]]
bool parquet___arrow___ArrowReaderProperties__get_read_dictionary(
const std::shared_ptr<parquet::ArrowReaderProperties>& properties, int column_index) {
return properties->read_dictionary(column_index);
}
// [[arrow::export]]
void parquet___arrow___ArrowReaderProperties__set_read_dictionary(
const std::shared_ptr<parquet::ArrowReaderProperties>& properties, int column_index,
bool read_dict) {
properties->set_read_dictionary(column_index, read_dict);
}
// [[arrow::export]]
std::shared_ptr<parquet::arrow::FileReader> parquet___arrow___FileReader__OpenFile(
const std::shared_ptr<arrow::io::RandomAccessFile>& file,
const std::shared_ptr<parquet::ArrowReaderProperties>& props) {
std::unique_ptr<parquet::arrow::FileReader> reader;
parquet::arrow::FileReaderBuilder builder;
PARQUET_THROW_NOT_OK(builder.Open(file));
PARQUET_THROW_NOT_OK(builder.properties(*props)->Build(&reader));
return std::move(reader);
}
// [[arrow::export]]
std::shared_ptr<arrow::Table> parquet___arrow___FileReader__ReadTable1(
const std::shared_ptr<parquet::arrow::FileReader>& reader) {
std::shared_ptr<arrow::Table> table;
PARQUET_THROW_NOT_OK(reader->ReadTable(&table));
return table;
}
// [[arrow::export]]
std::shared_ptr<arrow::Table> parquet___arrow___FileReader__ReadTable2(
const std::shared_ptr<parquet::arrow::FileReader>& reader,
const std::vector<int>& column_indices) {
std::shared_ptr<arrow::Table> table;
PARQUET_THROW_NOT_OK(reader->ReadTable(column_indices, &table));
return table;
}
// [[arrow::export]]
int64_t parquet___arrow___FileReader__num_rows(
const std::shared_ptr<parquet::arrow::FileReader>& reader) {
return reader->parquet_reader()->metadata()->num_rows();
}
namespace parquet {
class WriterPropertiesBuilder : public WriterProperties::Builder {
public:
using WriterProperties::Builder::Builder;
};
class ArrowWriterPropertiesBuilder : public ArrowWriterProperties::Builder {
public:
using ArrowWriterProperties::Builder::Builder;
};
} // namespace parquet
// [[arrow::export]]
std::shared_ptr<parquet::ArrowWriterProperties> parquet___ArrowWriterProperties___create(
bool allow_truncated_timestamps, bool use_deprecated_int96_timestamps,
int timestamp_unit) {
auto builder = std::make_shared<parquet::ArrowWriterPropertiesBuilder>();
builder->store_schema();
if (allow_truncated_timestamps) {
builder->allow_truncated_timestamps();
}
if (use_deprecated_int96_timestamps) {
builder->enable_deprecated_int96_timestamps();
}
if (timestamp_unit > -1) {
// -1 is passed in for NULL/default
builder->coerce_timestamps(static_cast<arrow::TimeUnit::type>(timestamp_unit));
}
return builder->build();
}
// [[arrow::export]]
std::shared_ptr<parquet::WriterPropertiesBuilder>
parquet___WriterProperties___Builder__create() {
return std::make_shared<parquet::WriterPropertiesBuilder>();
}
// [[arrow::export]]
void parquet___WriterProperties___Builder__version(
const std::shared_ptr<parquet::WriterPropertiesBuilder>& builder,
const parquet::ParquetVersion::type& version) {
builder->version(version);
}
// [[arrow::export]]
void parquet___ArrowWriterProperties___Builder__set_compressions(
const std::shared_ptr<parquet::WriterPropertiesBuilder>& builder,
const std::vector<std::string>& paths, const Rcpp::IntegerVector& types) {
auto n = types.size();
if (n == 1) {
builder->compression(static_cast<arrow::Compression::type>(types[0]));
} else {
for (decltype(n) i = 0; i < n; i++) {
builder->compression(paths[i], static_cast<arrow::Compression::type>(types[i]));
}
}
}
// [[arrow::export]]
void parquet___ArrowWriterProperties___Builder__set_compression_levels(
const std::shared_ptr<parquet::WriterPropertiesBuilder>& builder,
const std::vector<std::string>& paths, const Rcpp::IntegerVector& levels) {
auto n = levels.size();
if (n == 1) {
builder->compression_level(levels[0]);
} else {
for (decltype(n) i = 0; i < n; i++) {
builder->compression_level(paths[i], levels[i]);
}
}
}
// [[arrow::export]]
void parquet___ArrowWriterProperties___Builder__set_use_dictionary(
const std::shared_ptr<parquet::WriterPropertiesBuilder>& builder,
const std::vector<std::string>& paths, const Rcpp::LogicalVector& use_dictionary) {
auto n = use_dictionary.size();
if (n == 1) {
if (use_dictionary[0]) {
builder->enable_dictionary();
} else {
builder->disable_dictionary();
}
} else {
builder->disable_dictionary();
for (decltype(n) i = 0; i < n; i++) {
if (use_dictionary[i]) {
builder->enable_dictionary(paths[i]);
} else {
builder->disable_dictionary(paths[i]);
}
}
}
}
// [[arrow::export]]
void parquet___ArrowWriterProperties___Builder__set_write_statistics(
const std::shared_ptr<parquet::WriterPropertiesBuilder>& builder,
const std::vector<std::string>& paths, const Rcpp::LogicalVector& write_statistics) {
auto n = write_statistics.size();
if (n == 1) {
if (write_statistics[0]) {
builder->enable_statistics();
} else {
builder->disable_statistics();
}
} else {
builder->disable_statistics();
for (decltype(n) i = 0; i < n; i++) {
if (write_statistics[i]) {
builder->enable_statistics(paths[i]);
} else {
builder->disable_statistics(paths[i]);
}
}
}
}
// [[arrow::export]]
void parquet___ArrowWriterProperties___Builder__data_page_size(
const std::shared_ptr<parquet::WriterPropertiesBuilder>& builder,
int64_t data_page_size) {
builder->data_pagesize(data_page_size);
}
// [[arrow::export]]
std::shared_ptr<parquet::WriterProperties> parquet___WriterProperties___Builder__build(
const std::shared_ptr<parquet::WriterPropertiesBuilder>& builder) {
return builder->build();
}
// [[arrow::export]]
std::shared_ptr<parquet::arrow::FileWriter> parquet___arrow___ParquetFileWriter__Open(
const std::shared_ptr<arrow::Schema>& schema,
const std::shared_ptr<arrow::io::OutputStream>& sink,
const std::shared_ptr<parquet::WriterProperties>& properties,
const std::shared_ptr<parquet::ArrowWriterProperties>& arrow_properties) {
std::unique_ptr<parquet::arrow::FileWriter> writer;
PARQUET_THROW_NOT_OK(
parquet::arrow::FileWriter::Open(*schema, arrow::default_memory_pool(), sink,
properties, arrow_properties, &writer));
return std::move(writer);
}
// [[arrow::export]]
void parquet___arrow___FileWriter__WriteTable(
const std::shared_ptr<parquet::arrow::FileWriter>& writer,
const std::shared_ptr<arrow::Table>& table, int64_t chunk_size) {
PARQUET_THROW_NOT_OK(writer->WriteTable(*table, chunk_size));
}
// [[arrow::export]]
void parquet___arrow___FileWriter__Close(
const std::shared_ptr<parquet::arrow::FileWriter>& writer) {
PARQUET_THROW_NOT_OK(writer->Close());
}
// [[arrow::export]]
void parquet___arrow___WriteTable(
const std::shared_ptr<arrow::Table>& table,
const std::shared_ptr<arrow::io::OutputStream>& sink,
const std::shared_ptr<parquet::WriterProperties>& properties,
const std::shared_ptr<parquet::ArrowWriterProperties>& arrow_properties) {
PARQUET_THROW_NOT_OK(parquet::arrow::WriteTable(*table, arrow::default_memory_pool(),
sink, table->num_rows(), properties,
arrow_properties));
}
// [[arrow::export]]
std::shared_ptr<arrow::Schema> parquet___arrow___FileReader__GetSchema(
const std::shared_ptr<parquet::arrow::FileReader>& reader) {
std::shared_ptr<arrow::Schema> schema;
StopIfNotOk(reader->GetSchema(&schema));
return schema;
}
#endif