blob: cfb620294cd83f7e1d3ef566cd06fcd917c585b6 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// NOTE: File flink_fn_execution_pb2.py is generated from this file. Please re-generate it by calling
// gen_protos.py whenever this file is changed.
syntax = "proto3";
package org.apache.flink.fn_execution.v1;
option java_package = "org.apache.flink.fnexecution.v1";
option java_outer_classname = "FlinkFnApi";
// ------------------------------------------------------------------------
// Table API & SQL
// ------------------------------------------------------------------------
message Input {
oneof input {
UserDefinedFunction udf = 1;
int32 inputOffset = 2;
bytes inputConstant = 3;
}
}
// User-defined function definition. It supports chaining functions, that's, the execution
// result of one user-defined function as the input of another user-defined function.
message UserDefinedFunction {
// The serialized representation of the user-defined function
bytes payload = 1;
// The input arguments of the user-defined function, it could be one of the following:
// 1. A column from the input row
// 2. The result of another user-defined function
// 3. The constant value of the column
repeated Input inputs = 2;
// The index of the over window used in pandas batch over window aggregation
int32 window_index = 3;
// Whether the UDF takes row as input instead of each columns of a row
bool takes_row_as_input = 4;
// Whether it's pandas UDF
bool is_pandas_udf = 5;
}
// A list of user-defined functions to be executed in a batch.
message UserDefinedFunctions {
repeated UserDefinedFunction udfs = 1;
bool metric_enabled = 2;
repeated OverWindow windows = 3;
bool profile_enabled = 4;
}
// Used to describe the info of over window in pandas batch over window aggregation
message OverWindow {
enum WindowType {
RANGE_UNBOUNDED = 0;
RANGE_UNBOUNDED_PRECEDING = 1;
RANGE_UNBOUNDED_FOLLOWING = 2;
RANGE_SLIDING = 3;
ROW_UNBOUNDED = 4;
ROW_UNBOUNDED_PRECEDING = 5;
ROW_UNBOUNDED_FOLLOWING = 6;
ROW_SLIDING = 7;
}
WindowType window_type = 1;
int64 lower_boundary = 2;
int64 upper_boundary = 3;
}
message UserDefinedAggregateFunction {
message DataViewSpec {
message ListView {
Schema.FieldType element_type = 1;
}
message MapView {
Schema.FieldType key_type = 1;
Schema.FieldType value_type = 2;
}
string name = 1;
int32 field_index = 2;
oneof data_view {
ListView list_view = 3;
MapView map_view = 4;
}
}
// The serialized representation of the user-defined function
bytes payload = 1;
// The input arguments of the user-defined function, it could be one of the following:
// 1. A column from the input row
// 2. The result of another user-defined function
// 3. The constant value of the column
repeated Input inputs = 2;
repeated DataViewSpec specs = 3;
int32 filter_arg = 4;
bool distinct = 5;
// Whether the UDF takes row as input instead of each columns of a row
bool takes_row_as_input = 6;
}
message GroupWindow {
enum WindowType {
TUMBLING_GROUP_WINDOW = 0;
SLIDING_GROUP_WINDOW = 1;
SESSION_GROUP_WINDOW = 2;
}
enum WindowProperty {
WINDOW_START = 0;
WINDOW_END = 1;
ROW_TIME_ATTRIBUTE = 2;
PROC_TIME_ATTRIBUTE = 3;
}
WindowType window_type = 1;
bool is_time_window = 2;
int64 window_slide = 3;
int64 window_size = 4;
int64 window_gap = 5;
bool is_row_time = 6;
int32 time_field_index = 7;
int64 allowedLateness = 8;
repeated WindowProperty namedProperties = 9;
string shift_timezone = 10;
}
// A list of the user-defined aggregate functions to be executed in a group aggregate operation.
message UserDefinedAggregateFunctions {
repeated UserDefinedAggregateFunction udfs = 1;
bool metric_enabled = 2;
// The array of the key indexes.
repeated int32 grouping = 3;
// Generate retract messages if true.
bool generate_update_before = 4;
// The schema of the grouping keys.
Schema.FieldType key_type = 5;
// The index of a count aggregate used to calculate the number of accumulated rows.
int32 index_of_count_star = 6;
// Cleanup the expired state if true.
bool state_cleaning_enabled = 7;
// The state cache size.
int32 state_cache_size = 8;
// The map state read cache size.
int32 map_state_read_cache_size = 9;
// The map_state_write_cache_size.
int32 map_state_write_cache_size = 10;
// True if the count(*) agg is inserted by the planner.
bool count_star_inserted = 11;
// Group Window.
GroupWindow group_window = 12;
bool profile_enabled = 13;
}
// A representation of the data schema.
message Schema {
enum TypeName {
ROW = 0;
TINYINT = 1;
SMALLINT = 2;
INT = 3;
BIGINT = 4;
DECIMAL = 5;
FLOAT = 6;
DOUBLE = 7;
DATE = 8;
TIME = 9;
TIMESTAMP = 10;
BOOLEAN = 11;
BINARY = 12;
VARBINARY = 13;
CHAR = 14;
VARCHAR = 15;
BASIC_ARRAY = 16;
MAP = 17;
MULTISET = 18;
LOCAL_ZONED_TIMESTAMP = 19;
ZONED_TIMESTAMP = 20;
}
message MapInfo {
FieldType key_type = 1;
FieldType value_type = 2;
}
message TimeInfo {
int32 precision = 1;
}
message TimestampInfo {
int32 precision = 1;
}
message LocalZonedTimestampInfo {
int32 precision = 1;
}
message ZonedTimestampInfo {
int32 precision = 1;
}
message DecimalInfo {
int32 precision = 1;
int32 scale = 2;
}
message BinaryInfo {
int32 length = 1;
}
message VarBinaryInfo {
int32 length = 1;
}
message CharInfo {
int32 length = 1;
}
message VarCharInfo {
int32 length = 1;
}
message FieldType {
TypeName type_name = 1;
bool nullable = 2;
oneof type_info {
FieldType collection_element_type = 3;
MapInfo map_info = 4;
Schema row_schema = 5;
DecimalInfo decimal_info = 6;
TimeInfo time_info = 7;
TimestampInfo timestamp_info = 8;
LocalZonedTimestampInfo local_zoned_timestamp_info = 9;
ZonedTimestampInfo zoned_timestamp_info = 10;
BinaryInfo binary_info = 11;
VarBinaryInfo var_binary_info = 12;
CharInfo char_info = 13;
VarCharInfo var_char_info = 14;
}
}
message Field {
string name = 1;
string description = 2;
FieldType type = 3;
}
repeated Field fields = 1;
}
// ------------------------------------------------------------------------
// DataStream API
// ------------------------------------------------------------------------
// A representation of the data type information in DataStream.
message TypeInfo {
enum TypeName {
ROW = 0;
STRING = 1;
BYTE = 2;
BOOLEAN = 3;
SHORT = 4;
INT = 5;
LONG = 6;
FLOAT = 7;
DOUBLE = 8;
CHAR = 9;
BIG_INT = 10;
BIG_DEC = 11;
SQL_DATE = 12;
SQL_TIME = 13;
SQL_TIMESTAMP = 14;
BASIC_ARRAY = 15;
PRIMITIVE_ARRAY = 16;
TUPLE = 17;
LIST = 18;
MAP = 19;
PICKLED_BYTES = 20;
OBJECT_ARRAY = 21;
INSTANT = 22;
LOCAL_DATE = 24;
LOCAL_TIME = 25;
LOCAL_DATETIME = 26;
}
message MapTypeInfo {
TypeInfo key_type = 1;
TypeInfo value_type = 2;
}
message RowTypeInfo {
message Field {
string field_name = 1;
TypeInfo field_type = 2;
}
repeated Field fields = 1;
}
message TupleTypeInfo {
repeated TypeInfo field_types = 1;
}
TypeName type_name = 1;
oneof type_info {
TypeInfo collection_element_type = 2;
RowTypeInfo row_type_info = 3;
TupleTypeInfo tuple_type_info = 4;
MapTypeInfo map_type_info = 5;
}
}
// User defined DataStream function definition.
message UserDefinedDataStreamFunction {
enum FunctionType {
PROCESS = 0;
CO_PROCESS = 1;
KEYED_PROCESS = 2;
KEYED_CO_PROCESS = 3;
WINDOW = 4;
REVISE_OUTPUT = 100;
}
message JobParameter {
string key = 1;
string value = 2;
}
message RuntimeContext {
string task_name = 1;
string task_name_with_subtasks = 2;
int32 number_of_parallel_subtasks = 3;
int32 max_number_of_parallel_subtasks = 4;
int32 index_of_this_subtask = 5;
int32 attempt_number = 6;
repeated JobParameter job_parameters = 7;
bool in_batch_execution_mode = 8;
}
FunctionType function_type = 1;
RuntimeContext runtime_context = 2;
bytes payload = 3;
bool metric_enabled = 4;
TypeInfo key_type_info = 5;
bool profile_enabled = 6;
// The state cache size.
int32 state_cache_size = 7;
// The map state read cache size.
int32 map_state_read_cache_size = 8;
// The map_state_write_cache_size.
int32 map_state_write_cache_size = 9;
}
// A representation of State
message StateDescriptor {
message StateTTLConfig {
// This option value configures when to update last access timestamp which prolongs state TTL.
enum UpdateType {
// TTL is disabled. State does not expire.
Disabled = 0;
// Last access timestamp is initialised when state is created and updated on every write operation.
OnCreateAndWrite = 1;
// The same as OnCreateAndWrite but also updated on read.
OnReadAndWrite = 2;
}
// This option configures whether expired user value can be returned or not.
enum StateVisibility {
// Return expired user value if it is not cleaned up yet.
ReturnExpiredIfNotCleanedUp = 0;
// Never return expired user value.
NeverReturnExpired = 1;
}
// This option configures time scale to use for ttl.
enum TtlTimeCharacteristic {
// Processing time
ProcessingTime = 0;
}
// TTL cleanup strategies.
message CleanupStrategies {
// Fixed strategies ordinals in strategies config field.
enum Strategies {
FULL_STATE_SCAN_SNAPSHOT = 0;
INCREMENTAL_CLEANUP = 1;
ROCKSDB_COMPACTION_FILTER = 2;
}
enum EmptyCleanupStrategy {
EMPTY_STRATEGY = 0;
}
// Configuration of cleanup strategy while taking the full snapshot.
message IncrementalCleanupStrategy {
// Max number of keys pulled from queue for clean up upon state touch for any key.
int32 cleanup_size = 1;
// Whether to run incremental cleanup per each processed record.
bool run_cleanup_for_every_record = 2;
}
// Configuration of cleanup strategy using custom compaction filter in RocksDB.
message RocksdbCompactFilterCleanupStrategy {
// Number of state entries to process by compaction filter before updating current timestamp.
int64 query_time_after_num_entries = 1;
}
message MapStrategiesEntry {
Strategies strategy = 1;
oneof CleanupStrategy {
EmptyCleanupStrategy empty_strategy = 2;
IncrementalCleanupStrategy incremental_cleanup_strategy = 3;
RocksdbCompactFilterCleanupStrategy rocksdb_compact_filter_cleanup_strategy = 4;
}
}
bool is_cleanup_in_background = 1;
repeated MapStrategiesEntry strategies = 2;
}
UpdateType update_type = 1;
StateVisibility state_visibility = 2;
TtlTimeCharacteristic ttl_time_characteristic = 3;
int64 ttl = 4;
CleanupStrategies cleanup_strategies = 5;
}
string state_name = 1;
StateTTLConfig state_ttl_config = 2;
}
// ------------------------------------------------------------------------
// Common of Table API and DataStream API
// ------------------------------------------------------------------------
// A representation of the coder
message CoderInfoDescriptor {
// for Table & SQL
message FlattenRowType {
Schema schema = 1;
}
message RowType {
Schema schema = 1;
}
message ArrowType {
Schema schema = 1;
}
// only used in batch over window
// the data consists of [window data][arrow data]
message OverWindowArrowType {
Schema schema = 1;
}
// for DataStream
message RawType {
TypeInfo type_info = 1;
}
enum Mode {
// process one record at a time
SINGLE = 0;
// process multiple records at a time
MULTIPLE = 1;
}
oneof data_type {
// for Table & SQL
FlattenRowType flatten_row_type = 1;
RowType row_type = 2;
ArrowType arrow_type = 3;
OverWindowArrowType over_window_arrow_type = 4;
// for DataStream
RawType raw_type = 5;
}
Mode mode = 6;
// append an end message (0x00) after serializing the records belonging to the same batch.
bool separated_with_end_message = 7;
}