blob: fa359373114b5db2202cb59b0aebf0e548d50e4b [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/descriptors.h
// and modified by Doris
#pragma once
#include <gen_cpp/Descriptors_types.h>
#include <gen_cpp/Types_types.h>
#include <glog/logging.h>
#include <google/protobuf/stubs/port.h>
#include <stdint.h>
#include <ostream>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "common/be_mock_util.h"
#include "common/compiler_util.h" // IWYU pragma: keep
#include "common/global_types.h"
#include "common/object_pool.h"
#include "common/status.h"
#include "olap/utils.h"
#include "runtime/define_primitive_type.h"
#include "runtime/types.h"
#include "vec/data_types/data_type.h"
namespace google::protobuf {
template <typename Element>
class RepeatedField;
} // namespace google::protobuf
namespace doris {
class ObjectPool;
class PTupleDescriptor;
class PSlotDescriptor;
class SlotDescriptor {
public:
MOCK_DEFINE(virtual ~SlotDescriptor() = default;)
SlotId id() const { return _id; }
const TypeDescriptor& type() const { return _type; }
TupleId parent() const { return _parent; }
// Returns the column index of this slot, including partition keys.
// (e.g., col_pos - num_partition_keys = the table column this slot corresponds to)
int col_pos() const { return _col_pos; }
// Returns the field index in the generated llvm struct for this slot's tuple
int field_idx() const { return _field_idx; }
bool is_materialized() const { return _is_materialized; }
bool is_nullable() const { return _is_nullable; }
const std::string& col_name() const { return _col_name; }
const std::string& col_name_lower_case() const { return _col_name_lower_case; }
void to_protobuf(PSlotDescriptor* pslot) const;
std::string debug_string() const;
vectorized::MutableColumnPtr get_empty_mutable_column() const;
MOCK_FUNCTION doris::vectorized::DataTypePtr get_data_type_ptr() const;
int32_t col_unique_id() const { return _col_unique_id; }
bool is_key() const { return _is_key; }
const std::vector<std::string>& column_paths() const { return _column_paths; };
bool is_auto_increment() const { return _is_auto_increment; }
bool is_skip_bitmap_col() const { return _col_name == SKIP_BITMAP_COL; }
bool is_sequence_col() const { return _col_name == SEQUENCE_COL; }
const std::string& col_default_value() const { return _col_default_value; }
PrimitiveType col_type() const { return _type.type; }
private:
friend class DescriptorTbl;
friend class TupleDescriptor;
friend class SchemaScanner;
friend class OlapTableSchemaParam;
friend class PInternalServiceImpl;
friend class RowIdStorageReader;
friend class Tablet;
friend class TabletSchema;
const SlotId _id;
const TypeDescriptor _type;
const TupleId _parent;
const int _col_pos;
bool _is_nullable;
const std::string _col_name;
const std::string _col_name_lower_case;
const int32_t _col_unique_id;
// the idx of the slot in the tuple descriptor (0-based).
// this is provided by the FE
const int _slot_idx;
// the idx of the slot in the llvm codegen'd tuple struct
// this is set by TupleDescriptor during codegen and takes into account
// leading null bytes.
int _field_idx;
const bool _is_materialized;
const bool _is_key;
const std::vector<std::string> _column_paths;
const bool _is_auto_increment;
const std::string _col_default_value;
SlotDescriptor(const TSlotDescriptor& tdesc);
SlotDescriptor(const PSlotDescriptor& pdesc);
MOCK_DEFINE(SlotDescriptor();)
};
// Base class for table descriptors.
class TableDescriptor {
public:
TableDescriptor(const TTableDescriptor& tdesc);
virtual ~TableDescriptor() = default;
int num_cols() const { return _num_cols; }
int num_clustering_cols() const { return _num_clustering_cols; }
virtual std::string debug_string() const;
// The first _num_clustering_cols columns by position are clustering
// columns.
bool is_clustering_col(const SlotDescriptor* slot_desc) const {
return slot_desc->col_pos() < _num_clustering_cols;
}
::doris::TTableType::type table_type() const { return _table_type; }
const std::string& name() const { return _name; }
const std::string& database() const { return _database; }
int64_t table_id() const { return _table_id; }
private:
::doris::TTableType::type _table_type;
std::string _name;
std::string _database;
int64_t _table_id;
int _num_cols;
int _num_clustering_cols;
};
class OlapTableDescriptor : public TableDescriptor {
public:
OlapTableDescriptor(const TTableDescriptor& tdesc);
std::string debug_string() const override;
};
class SchemaTableDescriptor : public TableDescriptor {
public:
SchemaTableDescriptor(const TTableDescriptor& tdesc);
~SchemaTableDescriptor() override;
std::string debug_string() const override;
TSchemaTableType::type schema_table_type() const { return _schema_table_type; }
private:
TSchemaTableType::type _schema_table_type;
};
class BrokerTableDescriptor : public TableDescriptor {
public:
BrokerTableDescriptor(const TTableDescriptor& tdesc);
~BrokerTableDescriptor() override;
std::string debug_string() const override;
private:
};
class HiveTableDescriptor : public TableDescriptor {
public:
HiveTableDescriptor(const TTableDescriptor& tdesc);
~HiveTableDescriptor() override;
std::string debug_string() const override;
private:
};
class IcebergTableDescriptor : public TableDescriptor {
public:
IcebergTableDescriptor(const TTableDescriptor& tdesc);
~IcebergTableDescriptor() override;
std::string debug_string() const override;
private:
};
class MaxComputeTableDescriptor : public TableDescriptor {
public:
MaxComputeTableDescriptor(const TTableDescriptor& tdesc);
~MaxComputeTableDescriptor() override;
std::string debug_string() const override;
std::string region() const { return _region; }
std::string project() const { return _project; }
std::string table() const { return _table; }
std::string odps_url() const { return _odps_url; }
std::string tunnel_url() const { return _tunnel_url; }
std::string access_key() const { return _access_key; }
std::string secret_key() const { return _secret_key; }
std::string public_access() const { return _public_access; }
std::string endpoint() const { return _endpoint; }
std::string quota() const { return _quota; }
Status init_status() const { return _init_status; }
private:
std::string _region; //deprecated
std::string _project;
std::string _table;
std::string _odps_url; //deprecated
std::string _tunnel_url; //deprecated
std::string _access_key;
std::string _secret_key;
std::string _public_access; //deprecated
std::string _endpoint;
std::string _quota;
Status _init_status = Status::OK();
};
class TrinoConnectorTableDescriptor : public TableDescriptor {
public:
TrinoConnectorTableDescriptor(const TTableDescriptor& tdesc);
~TrinoConnectorTableDescriptor() override;
std::string debug_string() const override;
private:
};
class EsTableDescriptor : public TableDescriptor {
public:
EsTableDescriptor(const TTableDescriptor& tdesc);
~EsTableDescriptor() override;
std::string debug_string() const override;
private:
};
class MySQLTableDescriptor : public TableDescriptor {
public:
MySQLTableDescriptor(const TTableDescriptor& tdesc);
std::string debug_string() const override;
std::string mysql_db() const { return _mysql_db; }
std::string mysql_table() const { return _mysql_table; }
std::string host() const { return _host; }
std::string port() const { return _port; }
std::string user() const { return _user; }
std::string passwd() const { return _passwd; }
std::string charset() const { return _charset; }
private:
std::string _mysql_db;
std::string _mysql_table;
std::string _host;
std::string _port;
std::string _user;
std::string _passwd;
std::string _charset;
};
class ODBCTableDescriptor : public TableDescriptor {
public:
ODBCTableDescriptor(const TTableDescriptor& tdesc);
std::string debug_string() const override;
std::string db() const { return _db; }
std::string table() const { return _table; }
std::string host() const { return _host; }
std::string port() const { return _port; }
std::string user() const { return _user; }
std::string passwd() const { return _passwd; }
std::string driver() const { return _driver; }
TOdbcTableType::type type() const { return _type; }
private:
std::string _db;
std::string _table;
std::string _host;
std::string _port;
std::string _user;
std::string _passwd;
std::string _driver;
TOdbcTableType::type _type;
};
class JdbcTableDescriptor : public TableDescriptor {
public:
JdbcTableDescriptor(const TTableDescriptor& tdesc);
std::string debug_string() const override;
int64_t jdbc_catalog_id() const { return _jdbc_catalog_id; }
const std::string& jdbc_resource_name() const { return _jdbc_resource_name; }
const std::string& jdbc_driver_url() const { return _jdbc_driver_url; }
const std::string& jdbc_driver_class() const { return _jdbc_driver_class; }
const std::string& jdbc_driver_checksum() const { return _jdbc_driver_checksum; }
const std::string& jdbc_url() const { return _jdbc_url; }
const std::string& jdbc_table_name() const { return _jdbc_table_name; }
const std::string& jdbc_user() const { return _jdbc_user; }
const std::string& jdbc_passwd() const { return _jdbc_passwd; }
int32_t connection_pool_min_size() const { return _connection_pool_min_size; }
int32_t connection_pool_max_size() const { return _connection_pool_max_size; }
int32_t connection_pool_max_wait_time() const { return _connection_pool_max_wait_time; }
int32_t connection_pool_max_life_time() const { return _connection_pool_max_life_time; }
bool connection_pool_keep_alive() const { return _connection_pool_keep_alive; }
private:
int64_t _jdbc_catalog_id;
std::string _jdbc_resource_name;
std::string _jdbc_driver_url;
std::string _jdbc_driver_class;
std::string _jdbc_driver_checksum;
std::string _jdbc_url;
std::string _jdbc_table_name;
std::string _jdbc_user;
std::string _jdbc_passwd;
int32_t _connection_pool_min_size;
int32_t _connection_pool_max_size;
int32_t _connection_pool_max_wait_time;
int32_t _connection_pool_max_life_time;
bool _connection_pool_keep_alive;
};
class TupleDescriptor {
public:
TupleDescriptor(TupleDescriptor&&) = delete;
void operator=(const TupleDescriptor&) = delete;
MOCK_DEFINE(virtual) ~TupleDescriptor() {
if (_own_slots) {
for (SlotDescriptor* slot : _slots) {
delete slot;
}
}
}
MOCK_DEFINE(TupleDescriptor() : _id {0} {};)
int num_materialized_slots() const { return _num_materialized_slots; }
MOCK_FUNCTION const std::vector<SlotDescriptor*>& slots() const { return _slots; }
bool has_varlen_slots() const { return _has_varlen_slots; }
const TableDescriptor* table_desc() const { return _table_desc; }
TupleId id() const { return _id; }
std::string debug_string() const;
void to_protobuf(PTupleDescriptor* ptuple) const;
private:
friend class DescriptorTbl;
friend class SchemaScanner;
friend class OlapTableSchemaParam;
friend class PInternalServiceImpl;
friend class RowIdStorageReader;
friend class TabletSchema;
const TupleId _id;
TableDescriptor* _table_desc = nullptr;
int _num_materialized_slots;
std::vector<SlotDescriptor*> _slots; // contains all slots
// Provide quick way to check if there are variable length slots.
// True if _string_slots or _collection_slots have entries.
bool _has_varlen_slots;
bool _own_slots = false;
TupleDescriptor(const TTupleDescriptor& tdesc, bool own_slot = false);
TupleDescriptor(const PTupleDescriptor& tdesc, bool own_slot = false);
void add_slot(SlotDescriptor* slot);
};
class DescriptorTbl {
public:
// Creates a descriptor tbl within 'pool' from thrift_tbl and returns it via 'tbl'.
// Returns OK on success, otherwise error (in which case 'tbl' will be unset).
static Status create(ObjectPool* pool, const TDescriptorTable& thrift_tbl, DescriptorTbl** tbl);
TableDescriptor* get_table_descriptor(TableId id) const;
TupleDescriptor* get_tuple_descriptor(TupleId id) const;
SlotDescriptor* get_slot_descriptor(SlotId id) const;
const std::vector<TTupleId>& get_row_tuples() const { return _row_tuples; }
// return all registered tuple descriptors
std::vector<TupleDescriptor*> get_tuple_descs() const {
std::vector<TupleDescriptor*> descs;
for (auto it : _tuple_desc_map) {
descs.push_back(it.second);
}
return descs;
}
std::string debug_string() const;
private:
using TableDescriptorMap = std::unordered_map<TableId, TableDescriptor*>;
using TupleDescriptorMap = std::unordered_map<TupleId, TupleDescriptor*>;
using SlotDescriptorMap = std::unordered_map<SlotId, SlotDescriptor*>;
TableDescriptorMap _tbl_desc_map;
TupleDescriptorMap _tuple_desc_map;
SlotDescriptorMap _slot_desc_map;
std::vector<TTupleId> _row_tuples;
DescriptorTbl() = default;
};
#define RETURN_IF_INVALID_TUPLE_IDX(tuple_id, tuple_idx) \
do { \
if (UNLIKELY(RowDescriptor::INVALID_IDX == tuple_idx)) { \
return Status::InternalError("failed to get tuple idx with tuple id: {}", tuple_id); \
} \
} while (false)
// Records positions of tuples within row produced by ExecNode.
// TODO: this needs to differentiate between tuples contained in row
// and tuples produced by ExecNode (parallel to PlanNode.rowTupleIds and
// PlanNode.tupleIds); right now, we conflate the two (and distinguish based on
// context; for instance, HdfsScanNode uses these tids to create row batches, ie, the
// first case, whereas TopNNode uses these tids to copy output rows, ie, the second
// case)
class RowDescriptor {
public:
RowDescriptor(const DescriptorTbl& desc_tbl, const std::vector<TTupleId>& row_tuples,
const std::vector<bool>& nullable_tuples);
// standard copy c'tor, made explicit here
RowDescriptor(const RowDescriptor& desc)
: _tuple_desc_map(desc._tuple_desc_map),
_tuple_idx_nullable_map(desc._tuple_idx_nullable_map),
_tuple_idx_map(desc._tuple_idx_map),
_has_varlen_slots(desc._has_varlen_slots) {
auto it = desc._tuple_desc_map.begin();
for (; it != desc._tuple_desc_map.end(); ++it) {
_num_materialized_slots += (*it)->num_materialized_slots();
_num_slots += (*it)->slots().size();
}
}
RowDescriptor(TupleDescriptor* tuple_desc, bool is_nullable);
RowDescriptor(const RowDescriptor& lhs_row_desc, const RowDescriptor& rhs_row_desc);
// dummy descriptor, needed for the JNI EvalPredicate() function
RowDescriptor() = default;
MOCK_DEFINE(virtual ~RowDescriptor() = default;)
int num_materialized_slots() const { return _num_materialized_slots; }
int num_slots() const { return _num_slots; }
static const int INVALID_IDX;
// Returns INVALID_IDX if id not part of this row.
int get_tuple_idx(TupleId id) const;
// Return true if any Tuple has variable length slots.
bool has_varlen_slots() const { return _has_varlen_slots; }
// Return descriptors for all tuples in this row, in order of appearance.
MOCK_FUNCTION const std::vector<TupleDescriptor*>& tuple_descriptors() const {
return _tuple_desc_map;
}
// Populate row_tuple_ids with our ids.
void to_thrift(std::vector<TTupleId>* row_tuple_ids);
void to_protobuf(google::protobuf::RepeatedField<google::protobuf::int32>* row_tuple_ids) const;
// Return true if the tuple ids of this descriptor are a prefix
// of the tuple ids of other_desc.
bool is_prefix_of(const RowDescriptor& other_desc) const;
// Return true if the tuple ids of this descriptor match tuple ids of other desc.
bool equals(const RowDescriptor& other_desc) const;
std::string debug_string() const;
int get_column_id(int slot_id, bool force_materialize_slot = false) const;
private:
// Initializes tupleIdxMap during c'tor using the _tuple_desc_map.
void init_tuple_idx_map();
// Initializes _has_varlen_slots during c'tor using the _tuple_desc_map.
void init_has_varlen_slots();
// map from position of tuple w/in row to its descriptor
std::vector<TupleDescriptor*> _tuple_desc_map;
// _tuple_idx_nullable_map[i] is true if tuple i can be null
std::vector<bool> _tuple_idx_nullable_map;
// map from TupleId to position of tuple w/in row
std::vector<int> _tuple_idx_map;
// Provide quick way to check if there are variable length slots.
bool _has_varlen_slots = false;
int _num_materialized_slots = 0;
int _num_slots = 0;
};
} // namespace doris