blob: 05ec9018466ef0c3e4ce0e4eb8ac7ab571600ce1 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <string>
#include <Core/Block.h>
#include <Core/ColumnsWithTypeAndName.h>
#include <Core/NamesAndTypes.h>
#include <Formats/FormatSettings.h>
#include <Interpreters/ActionsDAG.h>
#include <boost/algorithm/string/replace.hpp>
#include <parquet/schema.h>
#include <tests/testConfig.h>
#include <Common/BlockTypeUtils.h>
namespace substrait
{
class Plan;
}
namespace local_engine
{
class LocalExecutor;
}
using BlockRowType = DB::ColumnsWithTypeAndName;
using BlockFieldType = DB::ColumnWithTypeAndName;
using FieldType = DB::NameAndTypePair;
namespace parquet
{
class ColumnDescriptor;
}
namespace DB
{
struct FormatSettings;
class ReadBuffer;
class Field;
}
namespace arrow::io
{
class RandomAccessFile;
}
namespace local_engine::test
{
std::string third_party_data(const char * file);
std::string gtest_data(const char * file);
std::string gtest_uri(const char * file);
std::shared_ptr<arrow::io::RandomAccessFile> asArrowFileForParquet(DB::ReadBuffer & in, const DB::FormatSettings & settings);
DB::DataTypePtr toDataType(const parquet::ColumnDescriptor & type);
RowType readParquetSchema(const std::string & file, const DB::FormatSettings & settings = DB::FormatSettings{});
std::optional<DB::ActionsDAG> parseFilter(const std::string & filter, const RowType & name_and_types);
std::pair<substrait::Plan, std::unique_ptr<LocalExecutor>>
create_plan_and_executor(std::string_view json_plan, std::string_view split, const std::optional<DB::ContextPtr> & context = std::nullopt);
std::pair<substrait::Plan, std::unique_ptr<LocalExecutor>> create_plan_and_executor(
std::string_view json_plan,
std::string_view split_template,
std::string_view file,
const std::optional<DB::ContextPtr> & context = std::nullopt);
}
using TestSettings = std::map<std::string, DB::Field>;
inline std::string replaceLocalFilesWildcards(const std::string_view haystack, const std::string_view replaced)
{
static constexpr auto wildcard = "{replace_local_files}";
return boost::replace_all_copy(std::string{haystack}, wildcard, replaced);
}
inline std::string replaceLocalFilesWithTPCH(const std::string_view haystack)
{
static constexpr auto wildcard = "{replace_GLUTEN_SOURCE_TPCH_DIR}";
constexpr std::string_view replaced = GLUTEN_SOURCE_TPCH_URI("");
return boost::replace_all_copy(std::string{haystack}, wildcard, replaced);
}
inline FieldType toNameTypePair(const parquet::ColumnDescriptor & type)
{
return {type.name(), local_engine::test::toDataType(type)};
}
inline local_engine::RowType toRowType(const DB::Block & header)
{
local_engine::RowType types;
for (const auto & name : header.getNames())
{
const auto * column = header.findByName(name);
types.push_back(DB::NameAndTypePair(column->name, column->type));
}
return types;
}
inline local_engine::RowType ROW(std::vector<std::string> && input, std::vector<DB::DataTypePtr> && type)
{
DB::NamesAndTypesList result;
for (size_t i = 0; i < input.size(); ++i)
result.emplace_back(input[i], type[i]);
return result;
}
template <class Predicate>
BlockRowType toBlockRowType(const local_engine::RowType & type, Predicate predicate)
{
BlockRowType result;
result.reserve(type.size());
for (const auto & field : type)
if (predicate(field))
result.emplace_back(local_engine::toColumnType(field));
return result;
}
inline parquet::ByteArray ByteArrayFromString(const std::string & s)
{
const auto * const ptr = reinterpret_cast<const uint8_t *>(s.data());
return parquet::ByteArray(static_cast<uint32_t>(s.size()), ptr);
}
#define EMBEDDED_PLAN(res) \
std::string_view \
{ \
reinterpret_cast<const char *>(g##res##Data), g##res##Size \
}