| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #pragma once |
| #include <memory> |
| #include <vector> |
| |
| #include "vec/exec/format/orc/vorc_reader.h" |
| #include "vec/exec/format/parquet/vparquet_reader.h" |
| #include "vec/exec/format/table/table_format_reader.h" |
| namespace doris::vectorized { |
| #include "common/compile_check_begin.h" |
| |
| // By holding a parquet/orc reader, used to read the parquet/orc table of hive. |
| class HiveReader : public TableFormatReader, public TableSchemaChangeHelper { |
| public: |
| HiveReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile, |
| RuntimeState* state, const TFileScanRangeParams& params, const TFileRangeDesc& range, |
| io::IOContext* io_ctx, const std::set<TSlotId>* is_file_slot, |
| FileMetaCache* meta_cache) |
| : TableFormatReader(std::move(file_format_reader), state, profile, params, range, |
| io_ctx, meta_cache), |
| _is_file_slot(is_file_slot) {}; |
| |
| ~HiveReader() override = default; |
| |
| Status get_next_block_inner(Block* block, size_t* read_rows, bool* eof) final; |
| |
| Status init_row_filters() final { return Status::OK(); }; |
| |
| protected: |
| // https://github.com/apache/doris/pull/23369 |
| const std::set<TSlotId>* _is_file_slot = nullptr; |
| }; |
| |
| class HiveOrcReader final : public HiveReader { |
| public: |
| ENABLE_FACTORY_CREATOR(HiveOrcReader); |
| HiveOrcReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile, |
| RuntimeState* state, const TFileScanRangeParams& params, |
| const TFileRangeDesc& range, io::IOContext* io_ctx, |
| const std::set<TSlotId>* is_file_slot, FileMetaCache* meta_cache) |
| : HiveReader(std::move(file_format_reader), profile, state, params, range, io_ctx, |
| is_file_slot, meta_cache) {}; |
| ~HiveOrcReader() final = default; |
| |
| Status init_reader( |
| const std::vector<std::string>& read_table_col_names, |
| std::unordered_map<std::string, uint32_t>* col_name_to_block_idx, |
| const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, |
| const RowDescriptor* row_descriptor, |
| const VExprContextSPtrs* not_single_slot_filter_conjuncts, |
| const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts); |
| |
| private: |
| static ColumnIdResult _create_column_ids(const orc::Type* orc_type, |
| const TupleDescriptor* tuple_descriptor); |
| |
| static ColumnIdResult _create_column_ids_by_top_level_col_index( |
| const orc::Type* orc_type, const TupleDescriptor* tuple_descriptor); |
| }; |
| |
| class HiveParquetReader final : public HiveReader { |
| public: |
| ENABLE_FACTORY_CREATOR(HiveParquetReader); |
| HiveParquetReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile, |
| RuntimeState* state, const TFileScanRangeParams& params, |
| const TFileRangeDesc& range, io::IOContext* io_ctx, |
| const std::set<TSlotId>* is_file_slot, FileMetaCache* meta_cache) |
| : HiveReader(std::move(file_format_reader), profile, state, params, range, io_ctx, |
| is_file_slot, meta_cache) {}; |
| ~HiveParquetReader() final = default; |
| |
| Status init_reader( |
| const std::vector<std::string>& read_table_col_names, |
| std::unordered_map<std::string, uint32_t>* col_name_to_block_idx, |
| const VExprContextSPtrs& conjuncts, |
| phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>>& |
| slot_id_to_predicates, |
| const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, |
| const std::unordered_map<std::string, int>* colname_to_slot_id, |
| const VExprContextSPtrs* not_single_slot_filter_conjuncts, |
| const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts); |
| |
| private: |
| static ColumnIdResult _create_column_ids(const FieldDescriptor* field_desc, |
| const TupleDescriptor* tuple_descriptor); |
| |
| static ColumnIdResult _create_column_ids_by_top_level_col_index( |
| const FieldDescriptor* field_desc, const TupleDescriptor* tuple_descriptor); |
| }; |
| #include "common/compile_check_end.h" |
| } // namespace doris::vectorized |