blob: b762fce576377b4a7fec4b96b534ef44766005d5 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <Columns/IColumn.h>
#include <IO/PeekableReadBuffer.h>
#include <IO/ReadBuffer.h>
#include <Processors/Formats/IRowInputFormat.h>
#include <Processors/Formats/Impl/CSVRowInputFormat.h>
#include <Storages/SubstraitSource/FormatFile.h>
namespace local_engine
{
/// Read file from excel export.
class ExcelTextFormatFile : public FormatFile
{
// use excel text parser
static constexpr std::string_view USE_EXCEL_PARSER = "use_excel_serialization";
static constexpr std::string_view EXCEL_EMPTY_AS_NULL = "use_excel_serialization.empty_as_null";
static constexpr std::string_view EXCEL_NUMBER_FORCE = "use_excel_serialization.number_force";
static constexpr std::string_view EXCEL_QUOTE_STRICT = "use_excel_serialization.quote_strict";
public:
static bool useThis(const DB::ContextPtr & context);
public:
explicit ExcelTextFormatFile(
DB::ContextPtr context_, const substrait::ReadRel::LocalFiles::FileOrFiles & file_info_, ReadBufferBuilderPtr read_buffer_builder_)
: FormatFile(context_, file_info_, read_buffer_builder_)
{
}
~ExcelTextFormatFile() override = default;
FormatFile::InputFormatPtr
createInputFormat(const DB::Block & header, const std::shared_ptr<const DB::ActionsDAG> & filter_actions_dag = nullptr) override;
bool supportSplit() const override { return true; }
String getFileFormat() const override { return "ExcelText"; }
private:
DB::FormatSettings createFormatSettings() const;
};
class ExcelRowInputFormat final : public DB::CSVRowInputFormat
{
public:
ExcelRowInputFormat(
const DB::SharedHeader & header_,
std::shared_ptr<DB::PeekableReadBuffer> & buf_,
const DB::RowInputFormatParams & params_,
const DB::FormatSettings & format_settings_,
DB::Names & input_field_names_,
String escape_);
String getName() const override { return "ExcelRowInputFormat"; }
private:
String escape;
};
class ExcelTextFormatReader final : public DB::CSVFormatReader
{
public:
ExcelTextFormatReader(
DB::PeekableReadBuffer & buf_, DB::Names & input_field_names_, String escape_, const DB::FormatSettings & format_settings_);
std::vector<String> readNames() override;
std::vector<String> readTypes() override;
void skipFieldDelimiter() override;
void skipRowEndDelimiter() override;
bool readField(
DB::IColumn & column,
const DB::DataTypePtr & type,
const DB::SerializationPtr & serialization,
bool is_last_file_column,
const String & column_name) override;
void skipField(size_t /*file_column*/) override { skipField(); }
void skipField();
private:
void preSkipNullValue();
bool isEndOfLine();
static void skipEndOfLine(DB::ReadBuffer & readBuffer);
static void skipWhitespacesAndTabs(DB::ReadBuffer & readBuffer, bool allow_whitespace_or_tab_as_delimiter);
std::vector<String> input_field_names;
String escape;
};
}