blob: 653b983ef2c7b615d98479811ecca1d9e931a7ae [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
syntax = "proto3";
import "google/protobuf/timestamp.proto";
option java_package = "org.apache.arrow.flight.impl";
option go_package = "github.com/apache/arrow/go/arrow/flight/gen/flight";
option csharp_namespace = "Apache.Arrow.Flight.Protocol";
package arrow.flight.protocol;
/*
* A flight service is an endpoint for retrieving or storing Arrow data. A
* flight service can expose one or more predefined endpoints that can be
* accessed using the Arrow Flight Protocol. Additionally, a flight service
* can expose a set of actions that are available.
*/
service FlightService {
/*
* Handshake between client and server. Depending on the server, the
* handshake may be required to determine the token that should be used for
* future operations. Both request and response are streams to allow multiple
* round-trips depending on auth mechanism.
*/
rpc Handshake(stream HandshakeRequest) returns (stream HandshakeResponse) {}
/*
* Get a list of available streams given a particular criteria. Most flight
* services will expose one or more streams that are readily available for
* retrieval. This api allows listing the streams available for
* consumption. A user can also provide a criteria. The criteria can limit
* the subset of streams that can be listed via this interface. Each flight
* service allows its own definition of how to consume criteria.
*/
rpc ListFlights(Criteria) returns (stream FlightInfo) {}
/*
* For a given FlightDescriptor, get information about how the flight can be
* consumed. This is a useful interface if the consumer of the interface
* already can identify the specific flight to consume. This interface can
* also allow a consumer to generate a flight stream through a specified
* descriptor. For example, a flight descriptor might be something that
* includes a SQL statement or a Pickled Python operation that will be
* executed. In those cases, the descriptor will not be previously available
* within the list of available streams provided by ListFlights but will be
* available for consumption for the duration defined by the specific flight
* service.
*/
rpc GetFlightInfo(FlightDescriptor) returns (FlightInfo) {}
/*
* For a given FlightDescriptor, start a query and get information
* to poll its execution status. This is a useful interface if the
* query may be a long-running query. The first PollFlightInfo call
* should return as quickly as possible. (GetFlightInfo doesn't
* return until the query is complete.)
*
* A client can consume any available results before
* the query is completed. See PollInfo.info for details.
*
* A client can poll the updated query status by calling
* PollFlightInfo() with PollInfo.flight_descriptor. A server
* should not respond until the result would be different from last
* time. That way, the client can "long poll" for updates
* without constantly making requests. Clients can set a short timeout
* to avoid blocking calls if desired.
*
* A client can't use PollInfo.flight_descriptor after
* PollInfo.expiration_time passes. A server might not accept the
* retry descriptor anymore and the query may be cancelled.
*
* A client may use the CancelFlightInfo action with
* PollInfo.info to cancel the running query.
*/
rpc PollFlightInfo(FlightDescriptor) returns (PollInfo) {}
/*
* For a given FlightDescriptor, get the Schema as described in Schema.fbs::Schema
* This is used when a consumer needs the Schema of flight stream. Similar to
* GetFlightInfo this interface may generate a new flight that was not previously
* available in ListFlights.
*/
rpc GetSchema(FlightDescriptor) returns (SchemaResult) {}
/*
* Retrieve a single stream associated with a particular descriptor
* associated with the referenced ticket. A Flight can be composed of one or
* more streams where each stream can be retrieved using a separate opaque
* ticket that the flight service uses for managing a collection of streams.
*/
rpc DoGet(Ticket) returns (stream FlightData) {}
/*
* Push a stream to the flight service associated with a particular
* flight stream. This allows a client of a flight service to upload a stream
* of data. Depending on the particular flight service, a client consumer
* could be allowed to upload a single stream per descriptor or an unlimited
* number. In the latter, the service might implement a 'seal' action that
* can be applied to a descriptor once all streams are uploaded.
*/
rpc DoPut(stream FlightData) returns (stream PutResult) {}
/*
* Open a bidirectional data channel for a given descriptor. This
* allows clients to send and receive arbitrary Arrow data and
* application-specific metadata in a single logical stream. In
* contrast to DoGet/DoPut, this is more suited for clients
* offloading computation (rather than storage) to a Flight service.
*/
rpc DoExchange(stream FlightData) returns (stream FlightData) {}
/*
* Flight services can support an arbitrary number of simple actions in
* addition to the possible ListFlights, GetFlightInfo, DoGet, DoPut
* operations that are potentially available. DoAction allows a flight client
* to do a specific action against a flight service. An action includes
* opaque request and response objects that are specific to the type action
* being undertaken.
*/
rpc DoAction(Action) returns (stream Result) {}
/*
* A flight service exposes all of the available action types that it has
* along with descriptions. This allows different flight consumers to
* understand the capabilities of the flight service.
*/
rpc ListActions(Empty) returns (stream ActionType) {}
}
/*
* The request that a client provides to a server on handshake.
*/
message HandshakeRequest {
/*
* A defined protocol version
*/
uint64 protocol_version = 1;
/*
* Arbitrary auth/handshake info.
*/
bytes payload = 2;
}
message HandshakeResponse {
/*
* A defined protocol version
*/
uint64 protocol_version = 1;
/*
* Arbitrary auth/handshake info.
*/
bytes payload = 2;
}
/*
* A message for doing simple auth.
*/
message BasicAuth {
string username = 2;
string password = 3;
}
message Empty {}
/*
* Describes an available action, including both the name used for execution
* along with a short description of the purpose of the action.
*/
message ActionType {
string type = 1;
string description = 2;
}
/*
* A service specific expression that can be used to return a limited set
* of available Arrow Flight streams.
*/
message Criteria {
bytes expression = 1;
}
/*
* An opaque action specific for the service.
*/
message Action {
string type = 1;
bytes body = 2;
}
/*
* The request of the CancelFlightInfo action.
*
* The request should be stored in Action.body.
*/
message CancelFlightInfoRequest {
FlightInfo info = 1;
}
/*
* The request of the RenewFlightEndpoint action.
*
* The request should be stored in Action.body.
*/
message RenewFlightEndpointRequest {
FlightEndpoint endpoint = 1;
}
/*
* An opaque result returned after executing an action.
*/
message Result {
bytes body = 1;
}
/*
* The result of a cancel operation.
*
* This is used by CancelFlightInfoResult.status.
*/
enum CancelStatus {
// The cancellation status is unknown. Servers should avoid using
// this value (send a NOT_FOUND error if the requested query is
// not known). Clients can retry the request.
CANCEL_STATUS_UNSPECIFIED = 0;
// The cancellation request is complete. Subsequent requests with
// the same payload may return CANCELLED or a NOT_FOUND error.
CANCEL_STATUS_CANCELLED = 1;
// The cancellation request is in progress. The client may retry
// the cancellation request.
CANCEL_STATUS_CANCELLING = 2;
// The query is not cancellable. The client should not retry the
// cancellation request.
CANCEL_STATUS_NOT_CANCELLABLE = 3;
}
/*
* The result of the CancelFlightInfo action.
*
* The result should be stored in Result.body.
*/
message CancelFlightInfoResult {
CancelStatus status = 1;
}
/*
* Wrap the result of a getSchema call
*/
message SchemaResult {
// The schema of the dataset in its IPC form:
// 4 bytes - an optional IPC_CONTINUATION_TOKEN prefix
// 4 bytes - the byte length of the payload
// a flatbuffer Message whose header is the Schema
bytes schema = 1;
}
/*
* The name or tag for a Flight. May be used as a way to retrieve or generate
* a flight or be used to expose a set of previously defined flights.
*/
message FlightDescriptor {
/*
* Describes what type of descriptor is defined.
*/
enum DescriptorType {
// Protobuf pattern, not used.
UNKNOWN = 0;
/*
* A named path that identifies a dataset. A path is composed of a string
* or list of strings describing a particular dataset. This is conceptually
* similar to a path inside a filesystem.
*/
PATH = 1;
/*
* An opaque command to generate a dataset.
*/
CMD = 2;
}
DescriptorType type = 1;
/*
* Opaque value used to express a command. Should only be defined when
* type = CMD.
*/
bytes cmd = 2;
/*
* List of strings identifying a particular dataset. Should only be defined
* when type = PATH.
*/
repeated string path = 3;
}
/*
* The access coordinates for retrieval of a dataset. With a FlightInfo, a
* consumer is able to determine how to retrieve a dataset.
*/
message FlightInfo {
// The schema of the dataset in its IPC form:
// 4 bytes - an optional IPC_CONTINUATION_TOKEN prefix
// 4 bytes - the byte length of the payload
// a flatbuffer Message whose header is the Schema
bytes schema = 1;
/*
* The descriptor associated with this info.
*/
FlightDescriptor flight_descriptor = 2;
/*
* A list of endpoints associated with the flight. To consume the
* whole flight, all endpoints (and hence all Tickets) must be
* consumed. Endpoints can be consumed in any order.
*
* In other words, an application can use multiple endpoints to
* represent partitioned data.
*
* If the returned data has an ordering, an application can use
* "FlightInfo.ordered = true" or should return the all data in a
* single endpoint. Otherwise, there is no ordering defined on
* endpoints or the data within.
*
* A client can read ordered data by reading data from returned
* endpoints, in order, from front to back.
*
* Note that a client may ignore "FlightInfo.ordered = true". If an
* ordering is important for an application, an application must
* choose one of them:
*
* * An application requires that all clients must read data in
* returned endpoints order.
* * An application must return the all data in a single endpoint.
*/
repeated FlightEndpoint endpoint = 3;
// Set these to -1 if unknown.
int64 total_records = 4;
int64 total_bytes = 5;
/*
* FlightEndpoints are in the same order as the data.
*/
bool ordered = 6;
/*
* Application-defined metadata.
*
* There is no inherent or required relationship between this
* and the app_metadata fields in the FlightEndpoints or resulting
* FlightData messages. Since this metadata is application-defined,
* a given application could define there to be a relationship,
* but there is none required by the spec.
*/
bytes app_metadata = 7;
}
/*
* The information to process a long-running query.
*/
message PollInfo {
/*
* The currently available results.
*
* If "flight_descriptor" is not specified, the query is complete
* and "info" specifies all results. Otherwise, "info" contains
* partial query results.
*
* Note that each PollInfo response contains a complete
* FlightInfo (not just the delta between the previous and current
* FlightInfo).
*
* Subsequent PollInfo responses may only append new endpoints to
* info.
*
* Clients can begin fetching results via DoGet(Ticket) with the
* ticket in the info before the query is
* completed. FlightInfo.ordered is also valid.
*/
FlightInfo info = 1;
/*
* The descriptor the client should use on the next try.
* If unset, the query is complete.
*/
FlightDescriptor flight_descriptor = 2;
/*
* Query progress. If known, must be in [0.0, 1.0] but need not be
* monotonic or nondecreasing. If unknown, do not set.
*/
optional double progress = 3;
/*
* Expiration time for this request. After this passes, the server
* might not accept the retry descriptor anymore (and the query may
* be cancelled). This may be updated on a call to PollFlightInfo.
*/
google.protobuf.Timestamp expiration_time = 4;
}
/*
* A particular stream or split associated with a flight.
*/
message FlightEndpoint {
/*
* Token used to retrieve this stream.
*/
Ticket ticket = 1;
/*
* A list of URIs where this ticket can be redeemed via DoGet().
*
* If the list is empty, the expectation is that the ticket can only
* be redeemed on the current service where the ticket was
* generated.
*
* If the list is not empty, the expectation is that the ticket can
* be redeemed at any of the locations, and that the data returned
* will be equivalent. In this case, the ticket may only be redeemed
* at one of the given locations, and not (necessarily) on the
* current service.
*
* In other words, an application can use multiple locations to
* represent redundant and/or load balanced services.
*/
repeated Location location = 2;
/*
* Expiration time of this stream. If present, clients may assume
* they can retry DoGet requests. Otherwise, it is
* application-defined whether DoGet requests may be retried.
*/
google.protobuf.Timestamp expiration_time = 3;
/*
* Application-defined metadata.
*
* There is no inherent or required relationship between this
* and the app_metadata fields in the FlightInfo or resulting
* FlightData messages. Since this metadata is application-defined,
* a given application could define there to be a relationship,
* but there is none required by the spec.
*/
bytes app_metadata = 4;
}
/*
* A location where a Flight service will accept retrieval of a particular
* stream given a ticket.
*/
message Location {
string uri = 1;
}
/*
* An opaque identifier that the service can use to retrieve a particular
* portion of a stream.
*
* Tickets are meant to be single use. It is an error/application-defined
* behavior to reuse a ticket.
*/
message Ticket {
bytes ticket = 1;
}
/*
* A batch of Arrow data as part of a stream of batches.
*/
message FlightData {
/*
* The descriptor of the data. This is only relevant when a client is
* starting a new DoPut stream.
*/
FlightDescriptor flight_descriptor = 1;
/*
* Header for message data as described in Message.fbs::Message.
*/
bytes data_header = 2;
/*
* Application-defined metadata.
*/
bytes app_metadata = 3;
/*
* The actual batch of Arrow data. Preferably handled with minimal-copies
* coming last in the definition to help with sidecar patterns (it is
* expected that some implementations will fetch this field off the wire
* with specialized code to avoid extra memory copies).
*/
bytes data_body = 1000;
}
/**
* The response message associated with the submission of a DoPut.
*/
message PutResult {
bytes app_metadata = 1;
}