common/protobuf/control_service.proto - impala - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
 //

 syntax="proto2";

 package impala;

 import "common.proto";

 import "kudu/rpc/rpc_header.proto";

 message ParquetDmlStatsPB {
   // For each column, the on disk byte size
   map<string, int64> per_column_size = 1;
 }

 message KuduDmlStatsPB {
   // The number of reported per-row errors, i.e. this many rows were not modified.
   // Note that this aggregate is less useful than a breakdown of the number of errors by
   // error type, e.g. number of rows with duplicate key conflicts, number of rows
   // violating nullability constraints, etc., but it isn't possible yet to differentiate
   // all error types in the KuduTableSink yet.
   optional int64 num_row_errors = 1;
 }

 // ReportExecStatus

 // Per partition DML stats
 // TODO: this should include the table stats that we update the metastore with.
 message DmlStatsPB {
   optional int64 bytes_written = 1;

   optional ParquetDmlStatsPB parquet_stats = 2;

   optional KuduDmlStatsPB kudu_stats = 3;
 }

 // Per-partition statistics and metadata resulting from DML statements.
 message DmlPartitionStatusPB {
   // The id of the partition written to (may be -1 if the partition is created by this
   // query). See THdfsTable.partitions.
   optional int64 id = 1;

   // The number of rows modified in this partition
   optional int64 num_modified_rows = 2;

   // Detailed statistics gathered by table writers for this partition
   optional DmlStatsPB stats = 3;

   // Fully qualified URI to the base directory for this partition.
   optional string partition_base_dir = 4;

   // The latest observed Kudu timestamp reported by the local KuduSession.
   // This value is an unsigned int64.
   optional int64 kudu_latest_observed_ts = 5;
 }

 // The results of a DML statement, sent to the coordinator as part of
 // ReportExecStatusRequestPB
 message DmlExecStatusPB {
   // A map from temporary absolute file path to final absolute destination. The
   // coordinator performs these updates after the query completes.
   map<string, string> files_to_move = 1;

   // Per-partition details, used in finalization and reporting.
   // The keys represent partitions to create, coded as k1=v1/k2=v2/k3=v3..., with the
   // root's key in an unpartitioned table being ROOT_PARTITION_KEY.
   // The target table name is recorded in the corresponding TQueryExecRequest
   map<string, DmlPartitionStatusPB> per_partition_status = 2;
 }

 // Error message exchange format
 message ErrorLogEntryPB {
   // Number of error messages reported using the above identifier
   optional int32 count = 1;

   // Sample messages from the above error code
   repeated string messages = 2;
 }

 // Represents the states that a fragment instance goes through during its execution. The
 // current state gets sent back to the coordinator and will be presented to users through
 // the debug webpages. The states are listed in the order to which they are transitioned.
 // Not all states are necessarily transitioned through when there are errors.
 enum FInstanceExecStatePB {
   WAITING_FOR_EXEC = 0;
   WAITING_FOR_PREPARE = 1;
   WAITING_FOR_CODEGEN = 2;
   WAITING_FOR_OPEN = 3;
   WAITING_FOR_FIRST_BATCH = 4;
   FIRST_BATCH_PRODUCED = 5;
   PRODUCING_DATA = 6;
   LAST_BATCH_SENT = 7;
   FINISHED = 8;
 }

 // Represents any part of the status report that isn't idempotent. If the executor thinks
 // the report failed, we'll retransmit these parts, and this allows us to keep them
 // associated with their original sequence number so that if the coordinator actually did
 // receive the original report it won't reapply them.
 message StatefulStatusPB {
   // Sequence number prevents out-of-order or duplicated updates from being applied.
   // 'report_seq_no' will be <= the 'report_seq_no' in the FragmentInstanceExecStatusPB
   // that contains this StatefulStatusPB.
   optional int64 report_seq_no = 1;

   // Map of TErrorCode to ErrorLogEntryPB; New errors that have not been reported to
   // the coordinator by this fragment instance. Not idempotent.
   map<int32, ErrorLogEntryPB> error_log = 2;
 }

 // RPC error metadata that can be associated with a AuxErrorInfoPB object. Created if a
 // RPC to another node failed.
 message RPCErrorInfoPB {
   // The address of the RPC's target node.
   required NetworkAddressPB dest_node = 1;

   // The posix error code of the failed RPC.
   required int32 posix_error_code = 2;
 }

 // Error metadata that can be associated with a failed fragment instance. Used to store
 // extra info about errors encountered during fragment execution. This information is
 // used by the Coordinator to blacklist potentially unhealthy nodes.
 message AuxErrorInfoPB {
   // Set if the fragment instance failed because a RPC to another node failed. Only set
   // if the RPC failed due to a network error.
   optional RPCErrorInfoPB rpc_error_info = 1;
 }

 message FragmentInstanceExecStatusPB {
   // Sequence number prevents out-of-order or duplicated updates from being applied.
   optional int64 report_seq_no = 1;

   // The ID of the fragment instance which this report contains
   optional UniqueIdPB fragment_instance_id = 2;

   // If true, fragment finished executing.
   optional bool done = 3;

   // The current state of this fragment instance's execution.
   optional FInstanceExecStatePB current_state = 4;

   // Cumulative structural changes made by the table sink of this fragment
   // instance. This is sent only when 'done' above is true. Not idempotent.
   optional DmlExecStatusPB dml_exec_status = 5;

   // The non-idempotent parts of the report, and any prior reports that are not known to
   // have been received by the coordinator.
   repeated StatefulStatusPB stateful_report = 6;

   // Metadata associated with a failed fragment instance. Only set for failed fragment
   // instances.
   optional AuxErrorInfoPB aux_error_info = 7;
 }

 message ReportExecStatusRequestPB {
   // The query id which this report is for.
   optional UniqueIdPB query_id = 1;

   // same as TExecQueryFInstancesParams.coord_state_idx
   optional int32 coord_state_idx = 2;

   repeated FragmentInstanceExecStatusPB instance_exec_status = 3;

   // Sidecar index of the cumulative profiles of all fragment instances
   // in instance_exec_status.
   optional int32 thrift_profiles_sidecar_idx = 4;

   // Cumulative status for this backend.
   // See QueryState::overall_status for details.
   optional StatusPB overall_status = 5;

   // The fragment instance id of the first failed fragment instance. This corresponds to
   // the fragment which sets 'overall_status' above. Not set if 'overall_status' is a
   // general error (e.g. failure to start fragment instances).
   optional UniqueIdPB fragment_instance_id = 6;
 }

 message ReportExecStatusResponsePB {
   optional StatusPB status = 1;
 }

 message CancelQueryFInstancesRequestPB {
   // The query id of the query being cancelled.
   optional UniqueIdPB query_id = 1;
 }

 message CancelQueryFInstancesResponsePB {
   optional StatusPB status = 1;
 }

 message RemoteShutdownParamsPB {
   // Deadline for the shutdown. After this deadline expires (starting at the time when
   // this remote shutdown command is received), the Impala daemon exits immediately
   // regardless of whether queries are still executing.
   optional int64 deadline_s = 1;
 }

 // The current status of a shutdown operation.
 message ShutdownStatusPB {
   // Milliseconds remaining in startup grace period. 0 if the period has expired.
   optional int64 grace_remaining_ms = 1;

   // Milliseconds remaining in shutdown deadline. 0 if the deadline has expired.
   optional int64 deadline_remaining_ms = 2;

   // Number of fragment instances still executing.
   optional int64 finstances_executing = 3;

   // Number of client requests still registered with the Impala server that is being shut
   // down.
   optional int64 client_requests_registered = 4;

   // Number of queries still executing on backend.
   optional int64 backend_queries_executing = 5;
 }

 message RemoteShutdownResultPB {
   // Success or failure of the operation.
   optional StatusPB status = 1;

   // If status is OK, additional info about the shutdown status.
   optional ShutdownStatusPB shutdown_status = 2;
 }

 // ExecQueryFInstances
 message ExecQueryFInstancesRequestPB {
   // This backend's index into Coordinator::backend_states_, needed for subsequent rpcs to
   // the coordinator.
   optional int32 coord_state_idx = 1;

   // Sidecar index of the TQueryCtx.
   optional int32 query_ctx_sidecar_idx = 2;

   // Sidecar index of the TExecPlanFragmentInfo.
   optional int32 plan_fragment_info_sidecar_idx = 3;

   // The minimum query-wide memory reservation (in bytes) required for the backend
   // executing the instances in fragment_instance_ctxs. This is the peak minimum
   // reservation that may be required by the concurrently-executing operators at any
   // point in query execution. It may be less than the initial reservation total claims
   // (below) if execution of some operators never overlaps, which allows reuse of
   // reservations.
   optional int64 min_mem_reservation_bytes = 4;

   // Total of the initial buffer reservations that we expect to be claimed on this
   // backend for all fragment instances in fragment_instance_ctxs. I.e. the sum over all
   // operators in all fragment instances that execute on this backend. This is used for
   // an optimization in InitialReservation. Measured in bytes.
   optional int64 initial_mem_reservation_total_claims = 5;

   // The backend memory limit (in bytes) as set by the admission controller. Used by the
   // query mem tracker to enforce the memory limit.
   optional int64 per_backend_mem_limit = 6;
 }

 message ExecQueryFInstancesResponsePB {
   // Success or failure of the operation.
   optional StatusPB status = 1;
 }

 service ControlService {
   // Override the default authorization method.
   option (kudu.rpc.default_authz_method) = "Authorize";

   // Called by coord to start asynchronous execution of a query's fragment instances in
   // backend. Returns as soon as all incoming data streams have been set up.
   rpc ExecQueryFInstances(ExecQueryFInstancesRequestPB)
       returns (ExecQueryFInstancesResponsePB);

   // Update the coordinator with the query status of the backend.
   rpc ReportExecStatus(ReportExecStatusRequestPB) returns (ReportExecStatusResponsePB);

   // Called by coordinator to cancel execution of a single query's fragment instances,
   // which the coordinator initiated with a prior call to ExecQueryFInstances.
   // Cancellation is asynchronous (in the sense that this call may return before the
   // fragment instance has completely stopped executing).
   rpc CancelQueryFInstances(CancelQueryFInstancesRequestPB)
       returns (CancelQueryFInstancesResponsePB);

   // Called to initiate shutdown of this backend.
   rpc RemoteShutdown(RemoteShutdownParamsPB) returns (RemoteShutdownResultPB);
 }
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.
	//

	syntax="proto2";

	package impala;

	import "common.proto";

	import "kudu/rpc/rpc_header.proto";

	message ParquetDmlStatsPB {
	// For each column, the on disk byte size
	map<string, int64> per_column_size = 1;
	}

	message KuduDmlStatsPB {
	// The number of reported per-row errors, i.e. this many rows were not modified.
	// Note that this aggregate is less useful than a breakdown of the number of errors by
	// error type, e.g. number of rows with duplicate key conflicts, number of rows
	// violating nullability constraints, etc., but it isn't possible yet to differentiate
	// all error types in the KuduTableSink yet.
	optional int64 num_row_errors = 1;
	}

	// ReportExecStatus

	// Per partition DML stats
	// TODO: this should include the table stats that we update the metastore with.
	message DmlStatsPB {
	optional int64 bytes_written = 1;

	optional ParquetDmlStatsPB parquet_stats = 2;

	optional KuduDmlStatsPB kudu_stats = 3;
	}

	// Per-partition statistics and metadata resulting from DML statements.
	message DmlPartitionStatusPB {
	// The id of the partition written to (may be -1 if the partition is created by this
	// query). See THdfsTable.partitions.
	optional int64 id = 1;

	// The number of rows modified in this partition
	optional int64 num_modified_rows = 2;

	// Detailed statistics gathered by table writers for this partition
	optional DmlStatsPB stats = 3;

	// Fully qualified URI to the base directory for this partition.
	optional string partition_base_dir = 4;

	// The latest observed Kudu timestamp reported by the local KuduSession.
	// This value is an unsigned int64.
	optional int64 kudu_latest_observed_ts = 5;
	}

	// The results of a DML statement, sent to the coordinator as part of
	// ReportExecStatusRequestPB
	message DmlExecStatusPB {
	// A map from temporary absolute file path to final absolute destination. The
	// coordinator performs these updates after the query completes.
	map<string, string> files_to_move = 1;

	// Per-partition details, used in finalization and reporting.
	// The keys represent partitions to create, coded as k1=v1/k2=v2/k3=v3..., with the
	// root's key in an unpartitioned table being ROOT_PARTITION_KEY.
	// The target table name is recorded in the corresponding TQueryExecRequest
	map<string, DmlPartitionStatusPB> per_partition_status = 2;
	}

	// Error message exchange format
	message ErrorLogEntryPB {
	// Number of error messages reported using the above identifier
	optional int32 count = 1;

	// Sample messages from the above error code
	repeated string messages = 2;
	}

	// Represents the states that a fragment instance goes through during its execution. The
	// current state gets sent back to the coordinator and will be presented to users through
	// the debug webpages. The states are listed in the order to which they are transitioned.
	// Not all states are necessarily transitioned through when there are errors.
	enum FInstanceExecStatePB {
	WAITING_FOR_EXEC = 0;
	WAITING_FOR_PREPARE = 1;
	WAITING_FOR_CODEGEN = 2;
	WAITING_FOR_OPEN = 3;
	WAITING_FOR_FIRST_BATCH = 4;
	FIRST_BATCH_PRODUCED = 5;
	PRODUCING_DATA = 6;
	LAST_BATCH_SENT = 7;
	FINISHED = 8;
	}

	// Represents any part of the status report that isn't idempotent. If the executor thinks
	// the report failed, we'll retransmit these parts, and this allows us to keep them
	// associated with their original sequence number so that if the coordinator actually did
	// receive the original report it won't reapply them.
	message StatefulStatusPB {
	// Sequence number prevents out-of-order or duplicated updates from being applied.
	// 'report_seq_no' will be <= the 'report_seq_no' in the FragmentInstanceExecStatusPB
	// that contains this StatefulStatusPB.
	optional int64 report_seq_no = 1;

	// Map of TErrorCode to ErrorLogEntryPB; New errors that have not been reported to
	// the coordinator by this fragment instance. Not idempotent.
	map<int32, ErrorLogEntryPB> error_log = 2;
	}

	// RPC error metadata that can be associated with a AuxErrorInfoPB object. Created if a
	// RPC to another node failed.
	message RPCErrorInfoPB {
	// The address of the RPC's target node.
	required NetworkAddressPB dest_node = 1;

	// The posix error code of the failed RPC.
	required int32 posix_error_code = 2;
	}

	// Error metadata that can be associated with a failed fragment instance. Used to store
	// extra info about errors encountered during fragment execution. This information is
	// used by the Coordinator to blacklist potentially unhealthy nodes.
	message AuxErrorInfoPB {
	// Set if the fragment instance failed because a RPC to another node failed. Only set
	// if the RPC failed due to a network error.
	optional RPCErrorInfoPB rpc_error_info = 1;
	}

	message FragmentInstanceExecStatusPB {
	// Sequence number prevents out-of-order or duplicated updates from being applied.
	optional int64 report_seq_no = 1;

	// The ID of the fragment instance which this report contains
	optional UniqueIdPB fragment_instance_id = 2;

	// If true, fragment finished executing.
	optional bool done = 3;

	// The current state of this fragment instance's execution.
	optional FInstanceExecStatePB current_state = 4;

	// Cumulative structural changes made by the table sink of this fragment
	// instance. This is sent only when 'done' above is true. Not idempotent.
	optional DmlExecStatusPB dml_exec_status = 5;

	// The non-idempotent parts of the report, and any prior reports that are not known to
	// have been received by the coordinator.
	repeated StatefulStatusPB stateful_report = 6;

	// Metadata associated with a failed fragment instance. Only set for failed fragment
	// instances.
	optional AuxErrorInfoPB aux_error_info = 7;
	}

	message ReportExecStatusRequestPB {
	// The query id which this report is for.
	optional UniqueIdPB query_id = 1;

	// same as TExecQueryFInstancesParams.coord_state_idx
	optional int32 coord_state_idx = 2;

	repeated FragmentInstanceExecStatusPB instance_exec_status = 3;

	// Sidecar index of the cumulative profiles of all fragment instances
	// in instance_exec_status.
	optional int32 thrift_profiles_sidecar_idx = 4;

	// Cumulative status for this backend.
	// See QueryState::overall_status for details.
	optional StatusPB overall_status = 5;

	// The fragment instance id of the first failed fragment instance. This corresponds to
	// the fragment which sets 'overall_status' above. Not set if 'overall_status' is a
	// general error (e.g. failure to start fragment instances).
	optional UniqueIdPB fragment_instance_id = 6;
	}

	message ReportExecStatusResponsePB {
	optional StatusPB status = 1;
	}

	message CancelQueryFInstancesRequestPB {
	// The query id of the query being cancelled.
	optional UniqueIdPB query_id = 1;
	}

	message CancelQueryFInstancesResponsePB {
	optional StatusPB status = 1;
	}

	message RemoteShutdownParamsPB {
	// Deadline for the shutdown. After this deadline expires (starting at the time when
	// this remote shutdown command is received), the Impala daemon exits immediately
	// regardless of whether queries are still executing.
	optional int64 deadline_s = 1;
	}

	// The current status of a shutdown operation.
	message ShutdownStatusPB {
	// Milliseconds remaining in startup grace period. 0 if the period has expired.
	optional int64 grace_remaining_ms = 1;

	// Milliseconds remaining in shutdown deadline. 0 if the deadline has expired.
	optional int64 deadline_remaining_ms = 2;

	// Number of fragment instances still executing.
	optional int64 finstances_executing = 3;

	// Number of client requests still registered with the Impala server that is being shut
	// down.
	optional int64 client_requests_registered = 4;

	// Number of queries still executing on backend.
	optional int64 backend_queries_executing = 5;
	}

	message RemoteShutdownResultPB {
	// Success or failure of the operation.
	optional StatusPB status = 1;

	// If status is OK, additional info about the shutdown status.
	optional ShutdownStatusPB shutdown_status = 2;
	}

	// ExecQueryFInstances
	message ExecQueryFInstancesRequestPB {
	// This backend's index into Coordinator::backend_states_, needed for subsequent rpcs to
	// the coordinator.
	optional int32 coord_state_idx = 1;

	// Sidecar index of the TQueryCtx.
	optional int32 query_ctx_sidecar_idx = 2;

	// Sidecar index of the TExecPlanFragmentInfo.
	optional int32 plan_fragment_info_sidecar_idx = 3;

	// The minimum query-wide memory reservation (in bytes) required for the backend
	// executing the instances in fragment_instance_ctxs. This is the peak minimum
	// reservation that may be required by the concurrently-executing operators at any
	// point in query execution. It may be less than the initial reservation total claims
	// (below) if execution of some operators never overlaps, which allows reuse of
	// reservations.
	optional int64 min_mem_reservation_bytes = 4;

	// Total of the initial buffer reservations that we expect to be claimed on this
	// backend for all fragment instances in fragment_instance_ctxs. I.e. the sum over all
	// operators in all fragment instances that execute on this backend. This is used for
	// an optimization in InitialReservation. Measured in bytes.
	optional int64 initial_mem_reservation_total_claims = 5;

	// The backend memory limit (in bytes) as set by the admission controller. Used by the
	// query mem tracker to enforce the memory limit.
	optional int64 per_backend_mem_limit = 6;
	}

	message ExecQueryFInstancesResponsePB {
	// Success or failure of the operation.
	optional StatusPB status = 1;
	}

	service ControlService {
	// Override the default authorization method.
	option (kudu.rpc.default_authz_method) = "Authorize";

	// Called by coord to start asynchronous execution of a query's fragment instances in
	// backend. Returns as soon as all incoming data streams have been set up.
	rpc ExecQueryFInstances(ExecQueryFInstancesRequestPB)
	returns (ExecQueryFInstancesResponsePB);

	// Update the coordinator with the query status of the backend.
	rpc ReportExecStatus(ReportExecStatusRequestPB) returns (ReportExecStatusResponsePB);

	// Called by coordinator to cancel execution of a single query's fragment instances,
	// which the coordinator initiated with a prior call to ExecQueryFInstances.
	// Cancellation is asynchronous (in the sense that this call may return before the
	// fragment instance has completely stopped executing).
	rpc CancelQueryFInstances(CancelQueryFInstancesRequestPB)
	returns (CancelQueryFInstancesResponsePB);

	// Called to initiate shutdown of this backend.
	rpc RemoteShutdown(RemoteShutdownParamsPB) returns (RemoteShutdownResultPB);
	}