blob: 19dd86ba4dc28e6a567354967d65a7af86f4537c [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
namespace cpp impala
namespace java org.apache.impala.thrift
include "ExecStats.thrift"
include "Exprs.thrift"
include "Types.thrift"
include "Descriptors.thrift"
include "Partitions.thrift"
include "ResourceProfile.thrift"
enum TDataSinkType {
DATA_STREAM_SINK = 0
TABLE_SINK = 1
JOIN_BUILD_SINK = 2
PLAN_ROOT_SINK = 3
}
enum TSinkAction {
INSERT = 0
UPDATE = 1
UPSERT = 2
DELETE = 3
}
enum TTableSinkType {
HDFS = 0
HBASE = 1
KUDU = 2
}
// Sink which forwards data to a remote plan fragment,
// according to the given output partition specification
// (ie, the m:1 part of an m:n data stream)
struct TDataStreamSink {
// destination node id
1: required Types.TPlanNodeId dest_node_id
// Specification of how the output of a fragment is partitioned.
// If the partitioning type is UNPARTITIONED, the output is broadcast
// to each destination host.
2: required Partitions.TDataPartition output_partition
}
// Creates a new Hdfs files according to the evaluation of the partitionKeyExprs,
// and materializes all its input RowBatches as a Hdfs file.
struct THdfsTableSink {
1: required list<Exprs.TExpr> partition_key_exprs
2: required bool overwrite
// The 'skip.header.line.count' property of the target Hdfs table. We will insert this
// many empty lines at the beginning of new text files, which will be skipped by the
// scanners while reading from the files.
3: optional i32 skip_header_line_count
// This property indicates to the table sink whether the input is ordered by the
// partition keys, meaning partitions can be opened, written, and closed one by one.
4: required bool input_is_clustered
// Stores the indices into the list of non-clustering columns of the target table that
// are stored in the 'sort.columns' table property. This is used in the backend to
// populate the RowGroup::sorting_columns list in parquet files.
5: optional list<i32> sort_columns
// Stores the allocated ACID write id if the target table is transactional.
6: optional i64 write_id
// Sorting order. If not lexical, the backend should not populate the
// RowGroup::sorting_columns list in parquet files.
7: required Types.TSortingOrder sorting_order
}
// Structure to encapsulate specific options that are passed down to the KuduTableSink
struct TKuduTableSink {
// The position in this vector is equal to the position in the output expressions of the
// sink and holds the index of the corresponsding column in the Kudu schema,
// e.g. 'exprs[i]' references 'kudu_table.column(referenced_cols[i])'
1: optional list<i32> referenced_columns
// Defines if duplicate or not found keys should be ignored
2: optional bool ignore_not_found_or_duplicate
}
// Sink to create the build side of a JoinNode.
struct TJoinBuildSink {
1: required Types.TJoinTableId join_table_id
// only set for hash join build sinks
2: required list<Exprs.TExpr> build_exprs
}
struct TPlanRootSink {
1: required ResourceProfile.TBackendResourceProfile resource_profile
}
// Union type of all table sinks.
struct TTableSink {
1: required Types.TTableId target_table_id
2: required TTableSinkType type
3: required TSinkAction action
4: optional THdfsTableSink hdfs_table_sink
5: optional TKuduTableSink kudu_table_sink
}
struct TDataSink {
// Tagged union of the different types of data sink.
1: required TDataSinkType type
2: optional TDataStreamSink stream_sink
3: optional TTableSink table_sink
4: optional TJoinBuildSink join_build_sink
5: optional TPlanRootSink plan_root_sink
// A human-readable label for the sink.
6: optional string label
// Estimated execution stats generated by the planner.
7: optional ExecStats.TExecStats estimated_stats
// Exprs that produce values for slots of output tuple (one expr per slot).
// Only set by the DataSink implementations that require it.
8: optional list<Exprs.TExpr> output_exprs
}