blob: bc1635e0ccb9ae0fced1dd19ae8ae509222ab971 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstddef>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "common/status.h"
#include "gen_cpp/PlanNodes_types.h"
#include "io/fs/file_reader_writer_fwd.h"
#include "vec/exec/format/generic_reader.h"
namespace doris {
class RuntimeProfile;
class RuntimeState;
namespace io {
struct IOContext;
} // namespace io
} // namespace doris
namespace doris::vectorized {
class Block;
#include "common/compile_check_begin.h"
// Doris Native format reader.
// it will read a sequence of Blocks encoded in Doris Native binary format.
//
// NOTE: current implementation is just a skeleton and will be filled step by step.
class NativeReader : public GenericReader {
public:
ENABLE_FACTORY_CREATOR(NativeReader);
NativeReader(RuntimeProfile* profile, const TFileScanRangeParams& params,
const TFileRangeDesc& range, io::IOContext* io_ctx, RuntimeState* state);
~NativeReader() override;
// Initialize underlying file reader and any format specific state.
Status init_reader();
Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
Status get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
std::unordered_set<std::string>* missing_cols) override;
Status init_schema_reader() override;
Status get_parsed_schema(std::vector<std::string>* col_names,
std::vector<DataTypePtr>* col_types) override;
Status close() override;
bool count_read_rows() override { return true; }
protected:
void _collect_profile_before_close() override {}
private:
RuntimeProfile* _profile = nullptr;
const TFileScanRangeParams& _scan_params;
const TFileRangeDesc& _scan_range;
io::FileReaderSPtr _file_reader;
io::IOContext* _io_ctx = nullptr;
RuntimeState* _state = nullptr;
bool _eof = false;
// Current read offset in the underlying file.
int64_t _current_offset = 0;
int64_t _file_size = 0;
// Cached schema information from the first PBlock.
bool _schema_inited = false;
std::vector<std::string> _schema_col_names;
std::vector<DataTypePtr> _schema_col_types;
// Cached first block (serialized) to allow schema probing before data scan.
std::string _first_block_buf;
bool _first_block_loaded = false;
bool _first_block_consumed = false;
Status _read_next_pblock(std::string* buff, bool* eof);
Status _init_schema_from_pblock(const PBlock& pblock);
};
#include "common/compile_check_end.h"
} // namespace doris::vectorized