blob: aecbba317aecd1b331261ca600058e30e0c4f184 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
# cython: language_level = 3
from libcpp cimport bool as c_bool
from libc.string cimport const_char
from libcpp.vector cimport vector as std_vector
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport (CArray, CSchema, CStatus,
CResult, CTable, CMemoryPool,
CKeyValueMetadata,
CRecordBatch,
CTable, CCompressionType,
CRandomAccessFile, COutputStream,
TimeUnit)
cdef extern from "arrow/adapters/orc/options.h" \
namespace "arrow::adapters::orc" nogil:
cdef enum CompressionStrategy \
" arrow::adapters::orc::CompressionStrategy":
_CompressionStrategy_SPEED \
" arrow::adapters::orc::CompressionStrategy::kSpeed"
_CompressionStrategy_COMPRESSION \
" arrow::adapters::orc::CompressionStrategy::kCompression"
cdef enum WriterId" arrow::adapters::orc::WriterId":
_WriterId_ORC_JAVA_WRITER" arrow::adapters::orc::WriterId::kOrcJava"
_WriterId_ORC_CPP_WRITER" arrow::adapters::orc::WriterId::kOrcCpp"
_WriterId_PRESTO_WRITER" arrow::adapters::orc::WriterId::kPresto"
_WriterId_SCRITCHLEY_GO \
" arrow::adapters::orc::WriterId::kScritchleyGo"
_WriterId_TRINO_WRITER" arrow::adapters::orc::WriterId::kTrino"
_WriterId_UNKNOWN_WRITER" arrow::adapters::orc::WriterId::kUnknown"
cdef enum WriterVersion" arrow::adapters::orc::WriterVersion":
_WriterVersion_ORIGINAL \
" arrow::adapters::orc::WriterVersion::kOriginal"
_WriterVersion_HIVE_8732 \
" arrow::adapters::orc::WriterVersion::kHive8732"
_WriterVersion_HIVE_4243 \
" arrow::adapters::orc::WriterVersion::kHive4243"
_WriterVersion_HIVE_12055 \
" arrow::adapters::orc::WriterVersion::kHive12055"
_WriterVersion_HIVE_13083 \
" arrow::adapters::orc::WriterVersion::kHive13083"
_WriterVersion_ORC_101" arrow::adapters::orc::WriterVersion::kOrc101"
_WriterVersion_ORC_135" arrow::adapters::orc::WriterVersion::kOrc135"
_WriterVersion_ORC_517" arrow::adapters::orc::WriterVersion::kOrc517"
_WriterVersion_ORC_203" arrow::adapters::orc::WriterVersion::kOrc203"
_WriterVersion_ORC_14" arrow::adapters::orc::WriterVersion::kOrc14"
_WriterVersion_MAX" arrow::adapters::orc::WriterVersion::kMax"
cdef cppclass FileVersion" arrow::adapters::orc::FileVersion":
FileVersion(uint32_t major_version, uint32_t minor_version)
uint32_t major_version()
uint32_t minor_version()
c_string ToString()
cdef struct WriteOptions" arrow::adapters::orc::WriteOptions":
int64_t batch_size
FileVersion file_version
int64_t stripe_size
CCompressionType compression
int64_t compression_block_size
CompressionStrategy compression_strategy
int64_t row_index_stride
double padding_tolerance
double dictionary_key_size_threshold
std_vector[int64_t] bloom_filter_columns
double bloom_filter_fpp
cdef extern from "arrow/adapters/orc/adapter.h" \
namespace "arrow::adapters::orc" nogil:
cdef cppclass ORCFileReader:
@staticmethod
CResult[unique_ptr[ORCFileReader]] Open(
const shared_ptr[CRandomAccessFile]& file,
CMemoryPool* pool)
CResult[shared_ptr[const CKeyValueMetadata]] ReadMetadata()
CResult[shared_ptr[CSchema]] ReadSchema()
CResult[shared_ptr[CRecordBatch]] ReadStripe(int64_t stripe)
CResult[shared_ptr[CRecordBatch]] ReadStripe(
int64_t stripe, std_vector[c_string])
CResult[shared_ptr[CTable]] Read()
CResult[shared_ptr[CTable]] Read(std_vector[c_string])
int64_t NumberOfStripes()
int64_t NumberOfRows()
FileVersion GetFileVersion()
c_string GetSoftwareVersion()
CResult[CCompressionType] GetCompression()
int64_t GetCompressionSize()
int64_t GetRowIndexStride()
WriterId GetWriterId()
int32_t GetWriterIdValue()
WriterVersion GetWriterVersion()
int64_t GetNumberOfStripeStatistics()
int64_t GetContentLength()
int64_t GetStripeStatisticsLength()
int64_t GetFileFooterLength()
int64_t GetFilePostscriptLength()
int64_t GetFileLength()
c_string GetSerializedFileTail()
cdef cppclass ORCFileWriter:
@staticmethod
CResult[unique_ptr[ORCFileWriter]] Open(
COutputStream* output_stream, const WriteOptions& writer_options)
CStatus Write(const CTable& table)
CStatus Close()