| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| #ifdef HAVE_CONFIG_H |
| # include <config.h> |
| #endif |
| |
| #include <arrow-glib/error.hpp> |
| #include <arrow-glib/input-stream.hpp> |
| #include <arrow-glib/orc-file-reader.hpp> |
| #include <arrow-glib/record-batch.hpp> |
| #include <arrow-glib/schema.hpp> |
| #include <arrow-glib/table.hpp> |
| |
| G_BEGIN_DECLS |
| |
| /** |
| * SECTION: orc-file-reader |
| * @section_id: orc-file-reader |
| * @title: ORC reader |
| * @include: arrow-glib/orc-file-reader.h |
| * |
| * #GArrowORCFileReader is a class for reading stripes in ORC file |
| * format from input. |
| */ |
| |
| typedef struct GArrowORCFileReaderPrivate_ { |
| GArrowSeekableInputStream *input; |
| arrow::adapters::orc::ORCFileReader *orc_file_reader; |
| GArray *field_indices; |
| } GArrowORCFileReaderPrivate; |
| |
| enum { |
| PROP_0, |
| PROP_INPUT, |
| PROP_ORC_FILE_READER |
| }; |
| |
| G_DEFINE_TYPE_WITH_PRIVATE(GArrowORCFileReader, |
| garrow_orc_file_reader, |
| G_TYPE_OBJECT); |
| |
| #define GARROW_ORC_FILE_READER_GET_PRIVATE(obj) \ |
| static_cast<GArrowORCFileReaderPrivate *>( \ |
| garrow_orc_file_reader_get_instance_private( \ |
| GARROW_ORC_FILE_READER(obj))) |
| |
| static void |
| garrow_orc_file_reader_dispose(GObject *object) |
| { |
| auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object); |
| |
| if (priv->input) { |
| g_object_unref(priv->input); |
| priv->input = NULL; |
| } |
| |
| G_OBJECT_CLASS(garrow_orc_file_reader_parent_class)->dispose(object); |
| } |
| |
| static void |
| garrow_orc_file_reader_finalize(GObject *object) |
| { |
| auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object); |
| |
| delete priv->orc_file_reader; |
| |
| if (priv->field_indices) { |
| g_array_free(priv->field_indices, TRUE); |
| } |
| |
| G_OBJECT_CLASS(garrow_orc_file_reader_parent_class)->finalize(object); |
| } |
| |
| static void |
| garrow_orc_file_reader_set_property(GObject *object, |
| guint prop_id, |
| const GValue *value, |
| GParamSpec *pspec) |
| { |
| auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object); |
| |
| switch (prop_id) { |
| case PROP_INPUT: |
| priv->input = GARROW_SEEKABLE_INPUT_STREAM(g_value_dup_object(value)); |
| break; |
| case PROP_ORC_FILE_READER: |
| priv->orc_file_reader = |
| static_cast<arrow::adapters::orc::ORCFileReader *>(g_value_get_pointer(value)); |
| break; |
| default: |
| G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); |
| break; |
| } |
| } |
| |
| static void |
| garrow_orc_file_reader_get_property(GObject *object, |
| guint prop_id, |
| GValue *value, |
| GParamSpec *pspec) |
| { |
| auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object); |
| |
| switch (prop_id) { |
| case PROP_INPUT: |
| g_value_set_object(value, priv->input); |
| break; |
| default: |
| G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); |
| break; |
| } |
| } |
| |
| static void |
| garrow_orc_file_reader_init(GArrowORCFileReader *object) |
| { |
| } |
| |
| static void |
| garrow_orc_file_reader_class_init(GArrowORCFileReaderClass *klass) |
| { |
| auto gobject_class = G_OBJECT_CLASS(klass); |
| |
| gobject_class->dispose = garrow_orc_file_reader_dispose; |
| gobject_class->finalize = garrow_orc_file_reader_finalize; |
| gobject_class->set_property = garrow_orc_file_reader_set_property; |
| gobject_class->get_property = garrow_orc_file_reader_get_property; |
| |
| GParamSpec *spec; |
| spec = g_param_spec_object("input", |
| "Input", |
| "The input stream", |
| GARROW_TYPE_SEEKABLE_INPUT_STREAM, |
| static_cast<GParamFlags>(G_PARAM_READWRITE | |
| G_PARAM_CONSTRUCT_ONLY)); |
| g_object_class_install_property(gobject_class, PROP_INPUT, spec); |
| |
| spec = g_param_spec_pointer("orc-file-reader", |
| "arrow::adapters::orc::ORCFileReader", |
| "The raw arrow::adapters::orc::ORCFileReader *", |
| static_cast<GParamFlags>(G_PARAM_WRITABLE | |
| G_PARAM_CONSTRUCT_ONLY)); |
| g_object_class_install_property(gobject_class, PROP_ORC_FILE_READER, spec); |
| } |
| |
| |
| /** |
| * garrow_orc_file_reader_new: |
| * @file: The file to be read. |
| * @error: (nullable): Return location for a #GError or %NULL. |
| * |
| * Returns: (nullable): A newly created #GArrowORCFileReader |
| * or %NULL on error. |
| * |
| * Since: 0.10.0 |
| */ |
| GArrowORCFileReader * |
| garrow_orc_file_reader_new(GArrowSeekableInputStream *input, |
| GError **error) |
| { |
| auto arrow_random_access_file = garrow_seekable_input_stream_get_raw(input); |
| auto pool = arrow::default_memory_pool(); |
| std::unique_ptr<arrow::adapters::orc::ORCFileReader> arrow_reader; |
| auto status = |
| arrow::adapters::orc::ORCFileReader::Open(arrow_random_access_file, |
| pool, |
| &arrow_reader); |
| if (garrow_error_check(error, status, "[orc-file-reader][new]")) { |
| return garrow_orc_file_reader_new_raw(input, arrow_reader.release()); |
| } else { |
| return NULL; |
| } |
| } |
| |
| /** |
| * garrow_orc_file_reader_set_field_indexes: |
| * @reader: A #GArrowORCFileReader. |
| * @field_indexes: (nullable) (array length=n_field_indexes): |
| * The field indexes to be read. |
| * @n_field_indexes: The number of the specified indexes. |
| * |
| * Since: 0.10.0 |
| * |
| * Deprecated: 0.12.0: |
| * Use garrow_orc_file_reader_set_field_indices() instead. |
| */ |
| void |
| garrow_orc_file_reader_set_field_indexes(GArrowORCFileReader *reader, |
| const gint *field_indexes, |
| guint n_field_indexes) |
| { |
| garrow_orc_file_reader_set_field_indices(reader, |
| field_indexes, |
| n_field_indexes); |
| } |
| |
| /** |
| * garrow_orc_file_reader_set_field_indices: |
| * @reader: A #GArrowORCFileReader. |
| * @field_indices: (nullable) (array length=n_field_indices): |
| * The field indices to be read. |
| * @n_field_indices: The number of the specified indices. |
| * |
| * Since: 0.12.0 |
| */ |
| void |
| garrow_orc_file_reader_set_field_indices(GArrowORCFileReader *reader, |
| const gint *field_indices, |
| guint n_field_indices) |
| { |
| auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader); |
| if (priv->field_indices) { |
| g_array_free(priv->field_indices, TRUE); |
| } |
| if (n_field_indices == 0) { |
| priv->field_indices = NULL; |
| } else { |
| priv->field_indices = g_array_sized_new(FALSE, |
| FALSE, |
| sizeof(gint), |
| n_field_indices); |
| g_array_append_vals(priv->field_indices, field_indices, n_field_indices); |
| } |
| } |
| |
| /** |
| * garrow_orc_file_reader_get_field_indexes: |
| * @reader: A #GArrowORCFileReader. |
| * @n_field_indexes: The number of the specified indexes. |
| * |
| * Returns: (nullable) (array length=n_field_indexes) (transfer none): |
| * The field indexes to be read. |
| * |
| * Since: 0.10.0 |
| * |
| * Deprecated: 0.12.0: |
| * Use garrow_orc_file_reader_get_field_indices() instead. |
| */ |
| const gint * |
| garrow_orc_file_reader_get_field_indexes(GArrowORCFileReader *reader, |
| guint *n_field_indexes) |
| { |
| return garrow_orc_file_reader_get_field_indices(reader, n_field_indexes); |
| } |
| |
| /** |
| * garrow_orc_file_reader_get_field_indices: |
| * @reader: A #GArrowORCFileReader. |
| * @n_field_indices: The number of the specified indices. |
| * |
| * Returns: (nullable) (array length=n_field_indices) (transfer none): |
| * The field indices to be read. |
| * |
| * Since: 0.12.0 |
| */ |
| const gint * |
| garrow_orc_file_reader_get_field_indices(GArrowORCFileReader *reader, |
| guint *n_field_indices) |
| { |
| auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader); |
| if (priv->field_indices) { |
| *n_field_indices = priv->field_indices->len; |
| return reinterpret_cast<gint *>(priv->field_indices->data); |
| } else { |
| *n_field_indices = 0; |
| return NULL; |
| } |
| } |
| |
| /** |
| * garrow_orc_file_reader_read_type: |
| * @reader: A #GArrowORCFileReader. |
| * @error: (nullable): Return location for a #GError or %NULL. |
| * |
| * Returns: (nullable) (transfer full): A newly read type as |
| * #GArrowSchema or %NULL on error. |
| * |
| * Since: 0.10.0 |
| */ |
| GArrowSchema * |
| garrow_orc_file_reader_read_type(GArrowORCFileReader *reader, |
| GError **error) |
| { |
| auto arrow_reader = garrow_orc_file_reader_get_raw(reader); |
| std::shared_ptr<arrow::Schema> arrow_schema; |
| auto status = arrow_reader->ReadSchema(&arrow_schema); |
| if (garrow_error_check(error, status, "[orc-file-reader][read-type]")) { |
| return garrow_schema_new_raw(&arrow_schema); |
| } else { |
| return NULL; |
| } |
| } |
| |
| /** |
| * garrow_orc_file_reader_read_stripes: |
| * @reader: A #GArrowORCFileReader. |
| * @error: (nullable): Return location for a #GError or %NULL. |
| * |
| * Returns: (nullable) (transfer full): A newly read stripes as |
| * #GArrowTable or %NULL on error. |
| * |
| * Since: 0.10.0 |
| */ |
| GArrowTable * |
| garrow_orc_file_reader_read_stripes(GArrowORCFileReader *reader, |
| GError **error) |
| { |
| auto arrow_reader = garrow_orc_file_reader_get_raw(reader); |
| auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader); |
| if (priv->field_indices) { |
| std::vector<int> arrow_field_indices; |
| auto field_indices = priv->field_indices; |
| for (guint i = 0; i < field_indices->len; ++i) { |
| arrow_field_indices.push_back(g_array_index(field_indices, gint, i)); |
| } |
| std::shared_ptr<arrow::Table> arrow_table; |
| auto status = arrow_reader->Read(arrow_field_indices, &arrow_table); |
| if (garrow_error_check(error, status, "[orc-file-reader][read-stripes]")) { |
| return garrow_table_new_raw(&arrow_table); |
| } else { |
| return NULL; |
| } |
| } else { |
| std::shared_ptr<arrow::Table> arrow_table; |
| auto status = arrow_reader->Read(&arrow_table); |
| if (garrow_error_check(error, status, "[orc-file-reader][read-stripes]")) { |
| return garrow_table_new_raw(&arrow_table); |
| } else { |
| return NULL; |
| } |
| } |
| } |
| |
| /** |
| * garrow_orc_file_reader_read_stripe: |
| * @reader: A #GArrowORCFileReader. |
| * @i: The stripe index to be read. |
| * @error: (nullable): Return location for a #GError or %NULL. |
| * |
| * Returns: (nullable) (transfer full): A newly read stripe as |
| * #GArrowRecordBatch or %NULL on error. |
| * |
| * Since: 0.10.0 |
| */ |
| GArrowRecordBatch * |
| garrow_orc_file_reader_read_stripe(GArrowORCFileReader *reader, |
| gint64 i, |
| GError **error) |
| { |
| auto arrow_reader = garrow_orc_file_reader_get_raw(reader); |
| if (i < 0) { |
| i += arrow_reader->NumberOfStripes(); |
| } |
| auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader); |
| if (priv->field_indices) { |
| std::vector<int> arrow_field_indices; |
| auto field_indices = priv->field_indices; |
| for (guint j = 0; j < field_indices->len; ++j) { |
| arrow_field_indices.push_back(g_array_index(field_indices, gint, j)); |
| } |
| std::shared_ptr<arrow::RecordBatch> arrow_record_batch; |
| auto status = arrow_reader->ReadStripe(i, |
| arrow_field_indices, |
| &arrow_record_batch); |
| if (garrow_error_check(error, status, "[orc-file-reader][read-stripe]")) { |
| return garrow_record_batch_new_raw(&arrow_record_batch); |
| } else { |
| return NULL; |
| } |
| } else { |
| std::shared_ptr<arrow::RecordBatch> arrow_record_batch; |
| auto status = arrow_reader->ReadStripe(i, &arrow_record_batch); |
| if (garrow_error_check(error, status, "[orc-file-reader][read-stripe]")) { |
| return garrow_record_batch_new_raw(&arrow_record_batch); |
| } else { |
| return NULL; |
| } |
| } |
| } |
| |
| /** |
| * garrow_orc_file_reader_get_n_stripes: |
| * @reader: A #GArrowORCFileReader. |
| * |
| * Returns: The number of stripes in the file. |
| * |
| * Since: 0.10.0 |
| */ |
| gint64 |
| garrow_orc_file_reader_get_n_stripes(GArrowORCFileReader *reader) |
| { |
| auto arrow_reader = garrow_orc_file_reader_get_raw(reader); |
| return arrow_reader->NumberOfStripes(); |
| } |
| |
| /** |
| * garrow_orc_file_reader_get_n_rows: |
| * @reader: A #GArrowORCFileReader. |
| * |
| * Returns: The number of rows in the file. |
| * |
| * Since: 0.10.0 |
| */ |
| gint64 |
| garrow_orc_file_reader_get_n_rows(GArrowORCFileReader *reader) |
| { |
| auto arrow_reader = garrow_orc_file_reader_get_raw(reader); |
| return arrow_reader->NumberOfRows(); |
| } |
| |
| |
| G_END_DECLS |
| |
| |
| GArrowORCFileReader * |
| garrow_orc_file_reader_new_raw(GArrowSeekableInputStream *input, |
| arrow::adapters::orc::ORCFileReader *arrow_reader) |
| { |
| auto reader = |
| GARROW_ORC_FILE_READER(g_object_new(GARROW_TYPE_ORC_FILE_READER, |
| "input", input, |
| "orc-file-reader", arrow_reader, |
| NULL)); |
| return reader; |
| } |
| |
| arrow::adapters::orc::ORCFileReader * |
| garrow_orc_file_reader_get_raw(GArrowORCFileReader *reader) |
| { |
| auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader); |
| return priv->orc_file_reader; |
| } |