blob: 9c76eacee99c1e748e494072a40a7a0ce4f8c993 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
namespace cpp doris
namespace java org.apache.doris.thrift
include "Exprs.thrift"
include "Types.thrift"
include "Opcodes.thrift"
include "Partitions.thrift"
include "Descriptors.thrift"
enum TPlanNodeType {
OLAP_SCAN_NODE,
MYSQL_SCAN_NODE,
CSV_SCAN_NODE, // deprecated
SCHEMA_SCAN_NODE,
HASH_JOIN_NODE,
MERGE_JOIN_NODE, // deprecated
AGGREGATION_NODE,
PRE_AGGREGATION_NODE,
SORT_NODE,
EXCHANGE_NODE,
MERGE_NODE,
SELECT_NODE,
CROSS_JOIN_NODE,
META_SCAN_NODE,
ANALYTIC_EVAL_NODE,
OLAP_REWRITE_NODE, // deprecated
KUDU_SCAN_NODE, // Deprecated
BROKER_SCAN_NODE,
EMPTY_SET_NODE,
UNION_NODE,
ES_SCAN_NODE,
ES_HTTP_SCAN_NODE,
REPEAT_NODE,
ASSERT_NUM_ROWS_NODE,
INTERSECT_NODE,
EXCEPT_NODE,
ODBC_SCAN_NODE,
TABLE_FUNCTION_NODE,
DATA_GEN_SCAN_NODE,
FILE_SCAN_NODE,
JDBC_SCAN_NODE,
}
// phases of an execution node
enum TExecNodePhase {
PREPARE,
OPEN,
GETNEXT,
CLOSE,
INVALID
}
// what to do when hitting a debug point (TPaloQueryOptions.DEBUG_ACTION)
enum TDebugAction {
WAIT,
FAIL
}
struct TKeyRange {
1: required i64 begin_key
2: required i64 end_key
3: required Types.TPrimitiveType column_type
4: required string column_name
}
// The information contained in subclasses of ScanNode captured in two separate
// Thrift structs:
// - TScanRange: the data range that's covered by the scan (which varies with the
// particular partition of the plan fragment of which the scan node is a part)
// - T<subclass>: all other operational parameters that are the same across
// all plan fragments
struct TPaloScanRange {
1: required list<Types.TNetworkAddress> hosts
2: required string schema_hash
3: required string version
4: required string version_hash // Deprecated
5: required Types.TTabletId tablet_id
6: required string db_name
7: optional list<TKeyRange> partition_column_ranges
8: optional string index_name
9: optional string table_name
}
enum TFileFormatType {
FORMAT_UNKNOWN = -1,
FORMAT_CSV_PLAIN = 0,
FORMAT_CSV_GZ,
FORMAT_CSV_LZO,
FORMAT_CSV_BZ2,
FORMAT_CSV_LZ4FRAME,
FORMAT_CSV_LZOP,
FORMAT_PARQUET,
FORMAT_CSV_DEFLATE,
FORMAT_ORC,
FORMAT_JSON,
FORMAT_PROTO,
}
// In previous versions, the data compression format and file format were stored together, as TFileFormatType,
// which was inconvenient for flexible combination of file format and compression format.
// Therefore, the compressed format is separately added here.
// In order to ensure forward compatibility, if this type is set, the type shall prevail,
// otherwise, the TFileFormatType shall prevail
enum TFileCompressType {
UNKNOWN,
PLAIN,
GZ,
LZO,
BZ2,
LZ4FRAME,
DEFLATE
}
struct THdfsConf {
1: required string key
2: required string value
}
struct THdfsParams {
1: optional string fs_name
2: optional string user
3: optional string hdfs_kerberos_principal
4: optional string hdfs_kerberos_keytab
5: optional list<THdfsConf> hdfs_conf
}
// One broker range information.
struct TBrokerRangeDesc {
1: required Types.TFileType file_type
2: required TFileFormatType format_type
3: required bool splittable;
// Path of this range
4: required string path
// Offset of this file start
5: required i64 start_offset;
// Size of this range, if size = -1, this means that will read to then end of file
6: required i64 size
// used to get stream for this load
7: optional Types.TUniqueId load_id
// total size of the file
8: optional i64 file_size
// number of columns from file
9: optional i32 num_of_columns_from_file
// columns parsed from file path should be after the columns read from file
10: optional list<string> columns_from_path
// it's usefull when format_type == FORMAT_JSON
11: optional bool strip_outer_array;
12: optional string jsonpaths;
13: optional string json_root;
// it's usefull when format_type == FORMAT_JSON
14: optional bool num_as_string;
15: optional bool fuzzy_parse;
16: optional THdfsParams hdfs_params
17: optional bool read_json_by_line;
// Whether read line by column defination, only for Hive
18: optional bool read_by_column_def;
// csv with header type
19: optional string header_type;
}
struct TBrokerScanRangeParams {
1: required i8 column_separator;
2: required i8 line_delimiter;
// We construct one line in file to a tuple. And each field of line
// correspond to a slot in this tuple.
// src_tuple_id is the tuple id of the input file
3: required Types.TTupleId src_tuple_id
// src_slot_ids is the slot_ids of the input file
// we use this id to find the slot descriptor
4: required list<Types.TSlotId> src_slot_ids
// dest_tuple_id is the tuple id that need by scan node
5: required Types.TTupleId dest_tuple_id
// This is expr that convert the content read from file
// the format that need by the compute layer.
6: optional map<Types.TSlotId, Exprs.TExpr> expr_of_dest_slot
// properties need to access broker.
7: optional map<string, string> properties;
// If partition_ids is set, data that doesn't in this partition will be filtered.
8: optional list<i64> partition_ids
// This is the mapping of dest slot id and src slot id in load expr
// It excludes the slot id which has the transform expr
9: optional map<Types.TSlotId, Types.TSlotId> dest_sid_to_src_sid_without_trans
// strictMode is a boolean
// if strict mode is true, the incorrect data (the result of cast is null) will not be loaded
10: optional bool strict_mode
// for multibytes separators
11: optional i32 column_separator_length = 1;
12: optional i32 line_delimiter_length = 1;
13: optional string column_separator_str;
14: optional string line_delimiter_str;
// trim double quotes for csv
15: optional bool trim_double_quotes;
}
// Broker scan range
struct TBrokerScanRange {
1: required list<TBrokerRangeDesc> ranges
2: required TBrokerScanRangeParams params
3: required list<Types.TNetworkAddress> broker_addresses
}
// Es scan range
struct TEsScanRange {
1: required list<Types.TNetworkAddress> es_hosts // es hosts is used by be scan node to connect to es
// has to set index and type here, could not set it in scannode
// because on scan node maybe scan an es alias then it contains one or more indices
2: required string index
3: optional string type
4: required i32 shard_id
}
struct TFileTextScanRangeParams {
1: optional string column_separator;
2: optional string line_delimiter;
}
struct TFileScanSlotInfo {
1: optional Types.TSlotId slot_id;
2: optional bool is_file_slot;
}
// descirbe how to read file
struct TFileAttributes {
1: optional TFileTextScanRangeParams text_params;
// it's usefull when format_type == FORMAT_JSON
2: optional bool strip_outer_array;
3: optional string jsonpaths;
4: optional string json_root;
5: optional bool num_as_string;
6: optional bool fuzzy_parse;
7: optional bool read_json_by_line;
// Whether read line by column defination, only for Hive
8: optional bool read_by_column_def;
// csv with header type
9: optional string header_type;
// trim double quotes for csv
10: optional bool trim_double_quotes;
}
struct TIcebergDeleteFileDesc {
1: optional string path;
2: optional i64 position_lower_bound;
3: optional i64 position_upper_bound;
4: optional list<i32> field_ids;
}
struct TIcebergFileDesc {
1: optional i32 format_version;
// Iceberg file type, 0: data, 1: position delete, 2: equality delete.
2: optional i32 content;
// When open a delete file, filter the data file path with the 'file_path' property
3: optional list<TIcebergDeleteFileDesc> delete_files;
// Deprecated
4: optional Types.TTupleId delete_table_tuple_id;
// Deprecated
5: optional Exprs.TExpr file_select_conjunct;
}
struct TTableFormatFileDesc {
1: optional string table_format_type
2: optional TIcebergFileDesc iceberg_params
}
struct TFileScanRangeParams {
1: optional Types.TFileType file_type;
2: optional TFileFormatType format_type;
3: optional TFileCompressType compress_type;
// If this is for load job, src point to the source table and dest point to the doris table.
// If this is for query, only dest_tuple_id is set, including both file slot and partition slot.
4: optional Types.TTupleId src_tuple_id;
5: optional Types.TTupleId dest_tuple_id
// num_of_columns_from_file can spilt the all_file_slot and all_partition_slot
6: optional i32 num_of_columns_from_file;
// all selected slots which may compose from file and partition value.
7: optional list<TFileScanSlotInfo> required_slots;
8: optional THdfsParams hdfs_params;
// properties for file such as s3 information
9: optional map<string, string> properties;
// The convert exprt map for load job
// desc slot id -> expr
10: optional map<Types.TSlotId, Exprs.TExpr> expr_of_dest_slot
11: optional map<Types.TSlotId, Exprs.TExpr> default_value_of_src_slot
// This is the mapping of dest slot id and src slot id in load expr
// It excludes the slot id which has the transform expr
12: optional map<Types.TSlotId, Types.TSlotId> dest_sid_to_src_sid_without_trans
// strictMode is a boolean
// if strict mode is true, the incorrect data (the result of cast is null) will not be loaded
13: optional bool strict_mode
14: optional list<Types.TNetworkAddress> broker_addresses
15: optional TFileAttributes file_attributes
16: optional Exprs.TExpr pre_filter_exprs
// Deprecated, For data lake table format
17: optional TTableFormatFileDesc table_format_params
// For csv query task, same the column index in file, order by dest_tuple
18: optional list<i32> column_idxs
// Map of slot to its position in table schema. Only for Hive external table.
19: optional map<string, i32> slot_name_to_schema_pos
}
struct TFileRangeDesc {
// If load_id is set, this is for stream/routine load.
// If path is set, this is for bulk load.
1: optional Types.TUniqueId load_id
// Path of this range
2: optional string path;
// Offset of this file start
3: optional i64 start_offset;
// Size of this range, if size = -1, this means that will read to the end of file
4: optional i64 size;
5: optional i64 file_size;
// columns parsed from file path should be after the columns read from file
6: optional list<string> columns_from_path;
// column names from file path, in the same order with columns_from_path
7: optional list<string> columns_from_path_keys;
// For data lake table format
8: optional TTableFormatFileDesc table_format_params
}
// TFileScanRange represents a set of descriptions of a file and the rules for reading and converting it.
// TFileScanRangeParams: describe how to read and convert file
// list<TFileRangeDesc>: file location and range
struct TFileScanRange {
1: optional list<TFileRangeDesc> ranges
2: optional TFileScanRangeParams params
}
// Scan range for external datasource, such as file on hdfs, es datanode, etc.
struct TExternalScanRange {
1: optional TFileScanRange file_scan_range
// TODO: add more scan range type?
}
enum TDataGenFunctionName {
NUMBERS = 0,
}
// Every table valued function should have a scan range definition to save its
// running parameters
struct TTVFNumbersScanRange {
1: optional i64 totalNumbers
}
struct TDataGenScanRange {
1: optional TTVFNumbersScanRange numbers_params
}
enum TIcebergMetadataType {
SNAPSHOTS = 0,
}
struct TIcebergMetadataParams {
1: optional TIcebergMetadataType metadata_type
}
struct TMetaScanRange {
1: optional TIcebergMetadataParams iceberg_params
}
// Specification of an individual data range which is held in its entirety
// by a storage server
struct TScanRange {
// one of these must be set for every TScanRange2
4: optional TPaloScanRange palo_scan_range
5: optional binary kudu_scan_token // Decrepated
6: optional TBrokerScanRange broker_scan_range
7: optional TEsScanRange es_scan_range
8: optional TExternalScanRange ext_scan_range
9: optional TDataGenScanRange data_gen_scan_range
10: optional TMetaScanRange meta_scan_range
}
struct TMySQLScanNode {
1: required Types.TTupleId tuple_id
2: required string table_name
3: required list<string> columns
4: required list<string> filters
}
struct TOdbcScanNode {
1: optional Types.TTupleId tuple_id
2: optional string table_name
//Deprecated
3: optional string driver
4: optional Types.TOdbcTableType type
5: optional list<string> columns
6: optional list<string> filters
//Use now
7: optional string connect_string
8: optional string query_string
}
struct TJdbcScanNode {
1: optional Types.TTupleId tuple_id
2: optional string table_name
3: optional string query_string
4: optional Types.TOdbcTableType table_type
}
struct TBrokerScanNode {
1: required Types.TTupleId tuple_id
// Partition info used to process partition select in broker load
2: optional list<Exprs.TExpr> partition_exprs
3: optional list<Partitions.TRangePartition> partition_infos
4: optional list<Exprs.TExpr> pre_filter_exprs
}
struct TFileScanNode {
1: optional Types.TTupleId tuple_id
}
struct TEsScanNode {
1: required Types.TTupleId tuple_id
2: optional map<string,string> properties
// used to indicate which fields can get from ES docavalue
// because elasticsearch can have "fields" feature, field can have
// two or more types, the first type maybe have not docvalue but other
// can have, such as (text field not have docvalue, but keyword can have):
// "properties": {
// "city": {
// "type": "text",
// "fields": {
// "raw": {
// "type": "keyword"
// }
// }
// }
// }
// then the docvalue context provided the mapping between the select field and real request field :
// {"city": "city.raw"}
// use select city from table, if enable the docvalue, we will fetch the `city` field value from `city.raw`
3: optional map<string, string> docvalue_context
// used to indicate which string-type field predicate should used xxx.keyword etc.
// "k1": {
// "type": "text",
// "fields": {
// "keyword": {
// "type": "keyword",
// "ignore_above": 256
// }
// }
// }
// k1 > 'abc' -> k1.keyword > 'abc'
4: optional map<string, string> fields_context
}
struct TMiniLoadEtlFunction {
1: required string function_name
2: required i32 param_column_index
}
struct TCsvScanNode {
1: required Types.TTupleId tuple_id
2: required list<string> file_paths
3: optional string column_separator
4: optional string line_delimiter
// <column_name, ColumnType>
5: optional map<string, Types.TColumnType> column_type_mapping
// columns specified in load command
6: optional list<string> columns
// <column_name, default_value_in_string>
7: optional list<string> unspecified_columns
// always string type, and only contain columns which are not specified
8: optional list<string> default_values
9: optional double max_filter_ratio
10:optional map<string, TMiniLoadEtlFunction> column_function_mapping
}
struct TSchemaTableStructure {
1: optional string column_name
2: optional Types.TPrimitiveType type
3: optional i64 len
4: optional bool is_null;
}
struct TSchemaScanNode {
1: required Types.TTupleId tuple_id
2: required string table_name
3: optional string db
4: optional string table
5: optional string wild
6: optional string user // deprecated
7: optional string ip // frontend ip
8: optional i32 port // frontend thrift server port
9: optional i64 thread_id
10: optional string user_ip // deprecated
11: optional Types.TUserIdentity current_user_ident // to replace the user and user_ip
12: optional bool show_hidden_cloumns = false
13: optional list<TSchemaTableStructure> table_structure
14: optional string catalog
}
struct TMetaScanNode {
1: required Types.TTupleId tuple_id
2: optional string catalog
3: optional string database
4: optional string table
}
struct TSortInfo {
1: required list<Exprs.TExpr> ordering_exprs
2: required list<bool> is_asc_order
// Indicates, for each expr, if nulls should be listed first or last. This is
// independent of is_asc_order.
3: required list<bool> nulls_first
// Expressions evaluated over the input row that materialize the tuple to be sorted.
// Contains one expr per slot in the materialized tuple.
4: optional list<Exprs.TExpr> sort_tuple_slot_exprs
// Indicates the nullable info of sort_tuple_slot_exprs is changed after substitute by child's smap
5: optional list<bool> slot_exprs_nullability_changed_flags
}
enum TPushAggOp {
NONE = 0,
MINMAX = 1,
COUNT = 2,
MIX = 3
}
struct TOlapScanNode {
1: required Types.TTupleId tuple_id
2: required list<string> key_column_name
3: required list<Types.TPrimitiveType> key_column_type
4: required bool is_preaggregation
5: optional string sort_column
6: optional Types.TKeysType keyType
7: optional string table_name
8: optional list<Descriptors.TColumn> columns_desc
9: optional TSortInfo sort_info
// When scan match sort_info, we can push limit into OlapScanNode.
// It's limit for scanner instead of scanNode so we add a new limit.
10: optional i64 sort_limit
11: optional bool enable_unique_key_merge_on_write
12: optional TPushAggOp push_down_agg_type_opt
}
struct TEqJoinCondition {
// left-hand side of "<a> = <b>"
1: required Exprs.TExpr left;
// right-hand side of "<a> = <b>"
2: required Exprs.TExpr right;
// operator of equal join
3: optional Opcodes.TExprOpcode opcode;
}
enum TJoinOp {
INNER_JOIN,
LEFT_OUTER_JOIN,
LEFT_SEMI_JOIN,
RIGHT_OUTER_JOIN,
FULL_OUTER_JOIN,
CROSS_JOIN,
MERGE_JOIN, // deprecated
RIGHT_SEMI_JOIN,
LEFT_ANTI_JOIN,
RIGHT_ANTI_JOIN,
// Similar to LEFT_ANTI_JOIN with special handling for NULLs for the join conjuncts
// on the build side. Those NULLs are considered candidate matches, and therefore could
// be rejected (ANTI-join), based on the other join conjuncts. This is in contrast
// to LEFT_ANTI_JOIN where NULLs are not matches and therefore always returned.
NULL_AWARE_LEFT_ANTI_JOIN
}
struct THashJoinNode {
1: required TJoinOp join_op
// anything from the ON, USING or WHERE clauses that's an equi-join predicate
2: required list<TEqJoinCondition> eq_join_conjuncts
// anything from the ON or USING clauses (but *not* the WHERE clause) that's not an
// equi-join predicate
3: optional list<Exprs.TExpr> other_join_conjuncts
// If true, this join node can (but may choose not to) generate slot filters
// after constructing the build side that can be applied to the probe side.
4: optional bool add_probe_filters
// anything from the ON or USING clauses (but *not* the WHERE clause) that's not an
// equi-join predicate, only use in vec exec engine
5: optional Exprs.TExpr vother_join_conjunct
// hash output column
6: optional list<Types.TSlotId> hash_output_slot_ids
// TODO: remove 7 and 8 in the version after the version include projection on ExecNode
7: optional list<Exprs.TExpr> srcExprList
8: optional Types.TTupleId voutput_tuple_id
9: optional list<Types.TTupleId> vintermediate_tuple_id_list
10: optional bool is_broadcast_join
11: optional bool is_mark
}
struct TNestedLoopJoinNode {
1: required TJoinOp join_op
// TODO: remove 2 and 3 in the version after the version include projection on ExecNode
2: optional list<Exprs.TExpr> srcExprList
3: optional Types.TTupleId voutput_tuple_id
4: optional list<Types.TTupleId> vintermediate_tuple_id_list
// for bitmap filer, don't need to join, but output left child tuple
5: optional bool is_output_left_side_only
6: optional Exprs.TExpr vjoin_conjunct
7: optional bool is_mark
}
struct TMergeJoinNode {
// anything from the ON, USING or WHERE clauses that's an equi-join predicate
1: required list<TEqJoinCondition> cmp_conjuncts
// anything from the ON or USING clauses (but *not* the WHERE clause) that's not an
// equi-join predicate
2: optional list<Exprs.TExpr> other_join_conjuncts
}
enum TAggregationOp {
INVALID,
COUNT,
MAX,
DISTINCT_PC,
DISTINCT_PCSA,
MIN,
SUM,
GROUP_CONCAT,
HLL,
COUNT_DISTINCT,
SUM_DISTINCT,
LEAD,
FIRST_VALUE,
LAST_VALUE,
RANK,
DENSE_RANK,
ROW_NUMBER,
LAG,
HLL_C,
BITMAP_UNION,
NTILE,
}
//struct TAggregateFunctionCall {
// The aggregate function to call.
// 1: required Types.TFunction fn
// The input exprs to this aggregate function
// 2: required list<Exprs.TExpr> input_exprs
// If set, this aggregate function udf has varargs and this is the index for the
// first variable argument.
// 3: optional i32 vararg_start_idx
//}
struct TAggregationNode {
1: optional list<Exprs.TExpr> grouping_exprs
// aggregate exprs. The root of each expr is the aggregate function. The
// other exprs are the inputs to the aggregate function.
2: required list<Exprs.TExpr> aggregate_functions
// Tuple id used for intermediate aggregations (with slots of agg intermediate types)
3: required Types.TTupleId intermediate_tuple_id
// Tupld id used for the aggregation output (with slots of agg output types)
// Equal to intermediate_tuple_id if intermediate type == output type for all
// aggregate functions.
4: required Types.TTupleId output_tuple_id
// Set to true if this aggregation function requires finalization to complete after all
// rows have been aggregated, and this node is not an intermediate node.
5: required bool need_finalize
6: optional bool use_streaming_preaggregation
7: optional list<TSortInfo> agg_sort_infos
8: optional bool is_first_phase
9: optional bool use_fixed_length_serialization_opt
}
struct TRepeatNode {
// Tulple id used for output, it has new slots.
1: required Types.TTupleId output_tuple_id
// Slot id set used to indicate those slots need to set to null.
2: required list<set<Types.TSlotId>> slot_id_set_list
// An integer bitmap list, it indicates the bit position of the exprs not null.
3: required list<i64> repeat_id_list
// A list of integer list, it indicates the position of the grouping virtual slot.
4: required list<list<i64>> grouping_list
// A list of all slot
5: required set<Types.TSlotId> all_slot_ids
6: required list<Exprs.TExpr> exprs
}
struct TPreAggregationNode {
1: required list<Exprs.TExpr> group_exprs
2: required list<Exprs.TExpr> aggregate_exprs
}
struct TSortNode {
1: required TSortInfo sort_info
// Indicates whether the backend service should use topn vs. sorting
2: required bool use_top_n;
// This is the number of rows to skip before returning results
3: optional i64 offset
// Indicates whether the imposed limit comes DEFAULT_ORDER_BY_LIMIT.
6: optional bool is_default_limit
}
enum TAnalyticWindowType {
// Specifies the window as a logical offset
RANGE,
// Specifies the window in physical units
ROWS
}
enum TAnalyticWindowBoundaryType {
// The window starts/ends at the current row.
CURRENT_ROW,
// The window starts/ends at an offset preceding current row.
PRECEDING,
// The window starts/ends at an offset following current row.
FOLLOWING
}
struct TAnalyticWindowBoundary {
1: required TAnalyticWindowBoundaryType type
// Predicate that checks: child tuple '<=' buffered tuple + offset for the orderby expr
2: optional Exprs.TExpr range_offset_predicate
// Offset from the current row for ROWS windows.
3: optional i64 rows_offset_value
}
struct TAnalyticWindow {
// Specifies the window type for the start and end bounds.
1: required TAnalyticWindowType type
// Absence indicates window start is UNBOUNDED PRECEDING.
2: optional TAnalyticWindowBoundary window_start
// Absence indicates window end is UNBOUNDED FOLLOWING.
3: optional TAnalyticWindowBoundary window_end
}
// Defines a group of one or more analytic functions that share the same window,
// partitioning expressions and order-by expressions and are evaluated by a single
// ExecNode.
struct TAnalyticNode {
// Exprs on which the analytic function input is partitioned. Input is already sorted
// on partitions and order by clauses, partition_exprs is used to identify partition
// boundaries. Empty if no partition clause is specified.
1: required list<Exprs.TExpr> partition_exprs
// Exprs specified by an order-by clause for RANGE windows. Used to evaluate RANGE
// window boundaries. Empty if no order-by clause is specified or for windows
// specifying ROWS.
2: required list<Exprs.TExpr> order_by_exprs
// Functions evaluated over the window for each input row. The root of each expr is
// the aggregate function. Child exprs are the inputs to the function.
3: required list<Exprs.TExpr> analytic_functions
// Window specification
4: optional TAnalyticWindow window
// Tuple used for intermediate results of analytic function evaluations
// (with slots of analytic intermediate types)
5: required Types.TTupleId intermediate_tuple_id
// Tupld used for the analytic function output (with slots of analytic output types)
// Equal to intermediate_tuple_id if intermediate type == output type for all
// analytic functions.
6: required Types.TTupleId output_tuple_id
// id of the buffered tuple (identical to the input tuple, which is assumed
// to come from a single SortNode); not set if both partition_exprs and
// order_by_exprs are empty
7: optional Types.TTupleId buffered_tuple_id
// predicate that checks: child tuple is in the same partition as the buffered tuple,
// i.e. each partition expr is equal or both are not null. Only set if
// buffered_tuple_id is set; should be evaluated over a row that is composed of the
// child tuple and the buffered tuple
8: optional Exprs.TExpr partition_by_eq
// predicate that checks: the order_by_exprs are equal or both NULL when evaluated
// over the child tuple and the buffered tuple. only set if buffered_tuple_id is set;
// should be evaluated over a row that is composed of the child tuple and the buffered
// tuple
9: optional Exprs.TExpr order_by_eq
}
struct TMergeNode {
// A MergeNode could be the left input of a join and needs to know which tuple to write.
1: required Types.TTupleId tuple_id
// List or expr lists materialized by this node.
// There is one list of exprs per query stmt feeding into this merge node.
2: required list<list<Exprs.TExpr>> result_expr_lists
// Separate list of expr lists coming from a constant select stmts.
3: required list<list<Exprs.TExpr>> const_expr_lists
}
struct TUnionNode {
// A UnionNode materializes all const/result exprs into this tuple.
1: required Types.TTupleId tuple_id
// List or expr lists materialized by this node.
// There is one list of exprs per query stmt feeding into this union node.
2: required list<list<Exprs.TExpr>> result_expr_lists
// Separate list of expr lists coming from a constant select stmts.
3: required list<list<Exprs.TExpr>> const_expr_lists
// Index of the first child that needs to be materialized.
4: required i64 first_materialized_child_idx
}
struct TIntersectNode {
// A IntersectNode materializes all const/result exprs into this tuple.
1: required Types.TTupleId tuple_id
// List or expr lists materialized by this node.
// There is one list of exprs per query stmt feeding into this union node.
2: required list<list<Exprs.TExpr>> result_expr_lists
// Separate list of expr lists coming from a constant select stmts.
3: required list<list<Exprs.TExpr>> const_expr_lists
// Index of the first child that needs to be materialized.
4: required i64 first_materialized_child_idx
}
struct TExceptNode {
// A ExceptNode materializes all const/result exprs into this tuple.
1: required Types.TTupleId tuple_id
// List or expr lists materialized by this node.
// There is one list of exprs per query stmt feeding into this union node.
2: required list<list<Exprs.TExpr>> result_expr_lists
// Separate list of expr lists coming from a constant select stmts.
3: required list<list<Exprs.TExpr>> const_expr_lists
// Index of the first child that needs to be materialized.
4: required i64 first_materialized_child_idx
}
struct TExchangeNode {
// The ExchangeNode's input rows form a prefix of the output rows it produces;
// this describes the composition of that prefix
1: required list<Types.TTupleId> input_row_tuples
// For a merging exchange, the sort information.
2: optional TSortInfo sort_info
// This is tHe number of rows to skip before returning results
3: optional i64 offset
}
struct TOlapRewriteNode {
1: required list<Exprs.TExpr> columns
2: required list<Types.TColumnType> column_types
3: required Types.TTupleId output_tuple_id
}
struct TTableFunctionNode {
1: optional list<Exprs.TExpr> fnCallExprList
2: optional list<Types.TSlotId> outputSlotIds
}
// This contains all of the information computed by the plan as part of the resource
// profile that is needed by the backend to execute.
struct TBackendResourceProfile {
// The minimum reservation for this plan node in bytes.
1: required i64 min_reservation = 0; // no support reservation
// The maximum reservation for this plan node in bytes. MAX_INT64 means effectively
// unlimited.
2: required i64 max_reservation = 12188490189880; // no max reservation limit
// The spillable buffer size in bytes to use for this node, chosen by the planner.
// Set iff the node uses spillable buffers.
3: optional i64 spillable_buffer_size = 2097152
// The buffer size in bytes that is large enough to fit the largest row to be processed.
// Set if the node allocates buffers for rows from the buffer pool.
// Deprecated after support string type
4: optional i64 max_row_buffer_size = 4294967296 // 4G
}
enum TAssertion {
EQ, // val1 == val2
NE, // val1 != val2
LT, // val1 < val2
LE, // val1 <= val2
GT, // val1 > val2
GE // val1 >= val2
}
struct TAssertNumRowsNode {
1: optional i64 desired_num_rows;
2: optional string subquery_string;
3: optional TAssertion assertion;
}
enum TRuntimeFilterType {
IN = 1
BLOOM = 2
MIN_MAX = 4
IN_OR_BLOOM = 8
BITMAP = 16
}
// Specification of a runtime filter.
struct TRuntimeFilterDesc {
// Filter unique id (within a query)
1: required i32 filter_id
// Expr on which the filter is built on a hash join.
2: required Exprs.TExpr src_expr
// The order of Expr in join predicate
3: required i32 expr_order
// Map of target node id to the target expr
4: required map<Types.TPlanNodeId, Exprs.TExpr> planId_to_target_expr
// Indicates if the source join node of this filter is a broadcast or
// a partitioned join.
5: required bool is_broadcast_join
// Indicates if there is at least one target scan node that is in the
// same fragment as the broadcast join that produced the runtime filter
6: required bool has_local_targets
// Indicates if there is at least one target scan node that is not in the same
// fragment as the broadcast join that produced the runtime filter
7: required bool has_remote_targets
// The type of runtime filter to build.
8: required TRuntimeFilterType type
// The size of the filter based on the ndv estimate and the min/max limit specified in
// the query options. Should be greater than zero for bloom filters, zero otherwise.
9: optional i64 bloom_filter_size_bytes
// for bitmap filter target expr
10: optional Exprs.TExpr bitmap_target_expr
// for bitmap filter
11: optional bool bitmap_filter_not_in
}
struct TDataGenScanNode {
1: optional Types.TTupleId tuple_id
2: optional TDataGenFunctionName func_name
}
// This is essentially a union of all messages corresponding to subclasses
// of PlanNode.
struct TPlanNode {
// node id, needed to reassemble tree structure
1: required Types.TPlanNodeId node_id
2: required TPlanNodeType node_type
3: required i32 num_children
4: required i64 limit
5: required list<Types.TTupleId> row_tuples
// nullable_tuples[i] is true if row_tuples[i] is nullable
6: required list<bool> nullable_tuples
7: optional list<Exprs.TExpr> conjuncts
// Produce data in compact format.
8: required bool compact_data
// one field per PlanNode subclass
11: optional THashJoinNode hash_join_node
12: optional TAggregationNode agg_node
13: optional TSortNode sort_node
14: optional TMergeNode merge_node
15: optional TExchangeNode exchange_node
17: optional TMySQLScanNode mysql_scan_node
18: optional TOlapScanNode olap_scan_node
19: optional TCsvScanNode csv_scan_node
20: optional TBrokerScanNode broker_scan_node
21: optional TPreAggregationNode pre_agg_node
22: optional TSchemaScanNode schema_scan_node
23: optional TMergeJoinNode merge_join_node
24: optional TMetaScanNode meta_scan_node
25: optional TAnalyticNode analytic_node
26: optional TOlapRewriteNode olap_rewrite_node
28: optional TUnionNode union_node
29: optional TBackendResourceProfile resource_profile
30: optional TEsScanNode es_scan_node
31: optional TRepeatNode repeat_node
32: optional TAssertNumRowsNode assert_num_rows_node
33: optional TIntersectNode intersect_node
34: optional TExceptNode except_node
35: optional TOdbcScanNode odbc_scan_node
// Runtime filters assigned to this plan node, exist in HashJoinNode and ScanNode
36: optional list<TRuntimeFilterDesc> runtime_filters
// Use in vec exec engine
40: optional Exprs.TExpr vconjunct
41: optional TTableFunctionNode table_function_node
// output column
42: optional list<Types.TSlotId> output_slot_ids
43: optional TDataGenScanNode data_gen_scan_node
// file scan node
44: optional TFileScanNode file_scan_node
45: optional TJdbcScanNode jdbc_scan_node
46: optional TNestedLoopJoinNode nested_loop_join_node
101: optional list<Exprs.TExpr> projections
102: optional Types.TTupleId output_tuple_id
}
// A flattened representation of a tree of PlanNodes, obtained by depth-first
// traversal.
struct TPlan {
1: required list<TPlanNode> nodes
}