| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| syntax = "proto2"; |
| |
| import "mesos/mesos.proto"; |
| |
| package mesos.scheduler; |
| |
| option cc_enable_arenas = true; |
| |
| option java_package = "org.apache.mesos.scheduler"; |
| option java_outer_classname = "Protos"; |
| |
| |
| /** |
| * Scheduler event API. |
| * |
| * An event is described using the standard protocol buffer "union" |
| * trick, see: |
| * https://developers.google.com/protocol-buffers/docs/techniques#union. |
| */ |
| message Event { |
| // Possible event types, followed by message definitions if |
| // applicable. |
| enum Type { |
| // This must be the first enum value in this list, to |
| // ensure that if 'type' is not set, the default value |
| // is UNKNOWN. This enables enum values to be added |
| // in a backwards-compatible way. See: MESOS-4997. |
| UNKNOWN = 0; |
| |
| SUBSCRIBED = 1; // See 'Subscribed' below. |
| OFFERS = 2; // See 'Offers' below. |
| INVERSE_OFFERS = 9; // See 'InverseOffers' below. |
| RESCIND = 3; // See 'Rescind' below. |
| RESCIND_INVERSE_OFFER = 10; // See 'RescindInverseOffer' below. |
| UPDATE = 4; // See 'Update' below. |
| UPDATE_OPERATION_STATUS = 11; // See 'UpdateOperationStatus' below. |
| MESSAGE = 5; // See 'Message' below. |
| FAILURE = 6; // See 'Failure' below. |
| ERROR = 7; // See 'Error' below. |
| |
| // Periodic message sent by the Mesos master according to |
| // 'Subscribed.heartbeat_interval_seconds'. If the scheduler does |
| // not receive any events (including heartbeats) for an extended |
| // period of time (e.g., 5 x heartbeat_interval_seconds), there is |
| // likely a network partition. In such a case the scheduler should |
| // close the existing subscription connection and resubscribe |
| // using a backoff strategy. |
| HEARTBEAT = 8; |
| } |
| |
| // First event received when the scheduler subscribes. |
| message Subscribed { |
| required FrameworkID framework_id = 1; |
| |
| // This value will be set if the master is sending heartbeats. See |
| // the comment above on 'HEARTBEAT' for more details. |
| optional double heartbeat_interval_seconds = 2; |
| |
| // Since Mesos 1.1. |
| optional MasterInfo master_info = 3; |
| } |
| |
| // Received whenever there are new resources that are offered to the |
| // scheduler. Each offer corresponds to a set of resources on an |
| // agent. Until the scheduler accepts or declines an offer the |
| // resources are considered allocated to the scheduler. |
| message Offers { |
| repeated Offer offers = 1; |
| } |
| |
| // Received whenever there are resources requested back from the |
| // scheduler. Each inverse offer specifies the agent, and |
| // optionally specific resources. Accepting or Declining an inverse |
| // offer informs the allocator of the scheduler's ability to release |
| // the specified resources without violating an SLA. If no resources |
| // are specified then all resources on the agent are requested to be |
| // released. |
| message InverseOffers { |
| repeated InverseOffer inverse_offers = 1; |
| } |
| |
| // Received when a particular offer is no longer valid (e.g., the |
| // slave corresponding to the offer has been removed) and hence |
| // needs to be rescinded. Any future calls ('Accept' / 'Decline') made |
| // by the scheduler regarding this offer will be invalid. |
| message Rescind { |
| required OfferID offer_id = 1; |
| } |
| |
| // Received when a particular inverse offer is no longer valid |
| // (e.g., the agent corresponding to the offer has been removed) |
| // and hence needs to be rescinded. Any future calls ('Accept' / |
| // 'Decline') made by the scheduler regarding this inverse offer |
| // will be invalid. |
| message RescindInverseOffer { |
| required OfferID inverse_offer_id = 1; |
| } |
| |
| // Received whenever there is a status update that is generated by |
| // the executor or slave or master. Status updates should be used by |
| // executors to reliably communicate the status of the tasks that |
| // they manage. It is crucial that a terminal update (see TaskState |
| // in mesos.proto) is sent by the executor as soon as the task |
| // terminates, in order for Mesos to release the resources allocated |
| // to the task. It is also the responsibility of the scheduler to |
| // explicitly acknowledge the receipt of a status update. See |
| // 'Acknowledge' in the 'Call' section below for the semantics. |
| // |
| // A task status update may be used for guaranteed delivery of some |
| // task-related information, e.g., task's health update. Such |
| // information may be shadowed by subsequent task status updates, that |
| // do not preserve fields of the previously sent message. |
| message Update { |
| required TaskStatus status = 1; |
| } |
| |
| // EXPERIMENTAL. |
| // |
| // Received when there is an operation status update generated by the master, |
| // agent, or resource provider. These updates are only sent to the framework |
| // for operations which had the operation ID set by the framework. It is the |
| // responsibility of the scheduler to explicitly acknowledge the receipt of a |
| // status update. |
| // See 'AcknowledgeOperationStatus' in the 'Call' section below for the |
| // semantics. |
| message UpdateOperationStatus { |
| required OperationStatus status = 1; |
| } |
| |
| // Received when a custom message generated by the executor is |
| // forwarded by the master. Note that this message is not |
| // interpreted by Mesos and is only forwarded (without reliability |
| // guarantees) to the scheduler. It is up to the executor to retry |
| // if the message is dropped for any reason. |
| message Message { |
| required SlaveID slave_id = 1; |
| required ExecutorID executor_id = 2; |
| required bytes data = 3; |
| } |
| |
| // Received when a slave is removed from the cluster (e.g., failed |
| // health checks) or when an executor is terminated. Note that, this |
| // event coincides with receipt of terminal UPDATE events for any |
| // active tasks belonging to the slave or executor and receipt of |
| // 'Rescind' events for any outstanding offers belonging to the |
| // slave. Note that there is no guaranteed order between the |
| // 'Failure', 'Update' and 'Rescind' events when a slave or executor |
| // is removed. |
| // TODO(vinod): Consider splitting the lost slave and terminated |
| // executor into separate events and ensure it's reliably generated. |
| message Failure { |
| optional SlaveID slave_id = 1; |
| |
| // If this was just a failure of an executor on a slave then |
| // 'executor_id' will be set and possibly 'status' (if we were |
| // able to determine the exit status). |
| optional ExecutorID executor_id = 2; |
| |
| // On Posix, `status` corresponds to termination information in the |
| // `stat_loc` area returned from a `waitpid` call. On Windows, `status` |
| // is obtained via calling the `GetExitCodeProcess()` function. For |
| // messages coming from Posix agents, schedulers need to apply |
| // `WEXITSTATUS` family macros or equivalent transformations to obtain |
| // exit codes. |
| // |
| // TODO(alexr): Consider unifying Windows and Posix behavior by returning |
| // exit code here, see MESOS-7241. |
| optional int32 status = 3; |
| } |
| |
| // Received when there is an unrecoverable error in the scheduler (e.g., |
| // scheduler failed over, rate limiting, authorization errors etc.). The |
| // scheduler should abort on receiving this event. |
| message Error { |
| required string message = 1; |
| } |
| |
| // Type of the event, indicates which optional field below should be |
| // present if that type has a nested message definition. |
| // Enum fields should be optional, see: MESOS-4997. |
| optional Type type = 1; |
| |
| optional Subscribed subscribed = 2; |
| optional Offers offers = 3; |
| optional InverseOffers inverse_offers = 9; |
| optional Rescind rescind = 4; |
| optional RescindInverseOffer rescind_inverse_offer = 10; |
| optional Update update = 5; |
| optional UpdateOperationStatus update_operation_status = 11; |
| optional Message message = 6; |
| optional Failure failure = 7; |
| optional Error error = 8; |
| } |
| |
| |
| /** |
| * NOTE: After resolution of MESOS-9648, this message is not currently used by |
| * Mesos. It is left in place in anticipation of future use. |
| * |
| * Synchronous responses for calls made to the scheduler API. |
| */ |
| message Response { |
| // Each of the responses of type `FOO` corresponds to `Foo` message below. |
| enum Type { |
| UNKNOWN = 0; |
| |
| // DEPRECATED. |
| // |
| // See 'ReconcileOperations' below. |
| RECONCILE_OPERATIONS = 1 [deprecated = true]; |
| } |
| |
| // DEPRECATED. |
| message ReconcileOperations { |
| repeated OperationStatus operation_statuses = 1; |
| } |
| |
| optional Type type = 1; |
| |
| // DEPRECATED. |
| optional ReconcileOperations reconcile_operations = 2; |
| } |
| |
| |
| /** |
| * Scheduler call API. |
| * |
| * Like Event, a Call is described using the standard protocol buffer |
| * "union" trick (see above). |
| */ |
| message Call { |
| // Possible call types, followed by message definitions if |
| // applicable. |
| enum Type { |
| // See comments above on `Event::Type` for more details on this enum value. |
| UNKNOWN = 0; |
| |
| SUBSCRIBE = 1; // See 'Subscribe' below. |
| TEARDOWN = 2; // Shuts down all tasks/executors and removes framework. |
| ACCEPT = 3; // See 'Accept' below. |
| DECLINE = 4; // See 'Decline' below. |
| ACCEPT_INVERSE_OFFERS = 13; // See 'AcceptInverseOffers' below. |
| DECLINE_INVERSE_OFFERS = 14; // See 'DeclineInverseOffers' below. |
| REVIVE = 5; // Removes any previous filters set via ACCEPT or DECLINE. |
| KILL = 6; // See 'Kill' below. |
| SHUTDOWN = 7; // See 'Shutdown' below. |
| ACKNOWLEDGE = 8; // See 'Acknowledge' below. |
| ACKNOWLEDGE_OPERATION_STATUS = 15; // See message below. |
| RECONCILE = 9; // See 'Reconcile' below. |
| RECONCILE_OPERATIONS = 16; // See 'ReconcileOperations' below. |
| MESSAGE = 10; // See 'Message' below. |
| REQUEST = 11; // See 'Request' below. |
| SUPPRESS = 12; // Inform master to stop sending offers to the framework. |
| UPDATE_FRAMEWORK = 17; // See `UpdateFramework` below. |
| |
| // TODO(benh): Consider adding an 'ACTIVATE' and 'DEACTIVATE' for |
| // already subscribed frameworks as a way of stopping offers from |
| // being generated and other events from being sent by the master. |
| // Note that this functionality existed originally to support |
| // SchedulerDriver::abort which was only necessary to handle |
| // exceptions getting thrown from within Scheduler callbacks, |
| // something that is not an issue with the Event/Call API. |
| } |
| |
| // Subscribes the scheduler with the master to receive events. A |
| // scheduler must send other calls only after it has received the |
| // SUBCRIBED event. |
| message Subscribe { |
| // See the comments below on 'framework_id' on the semantics for |
| // 'framework_info.id'. |
| required FrameworkInfo framework_info = 1; |
| |
| // NOTE: 'force' field is not present in v1/scheduler.proto because it is |
| // only used by the scheduler driver. The driver sets it to true when the |
| // scheduler reregisters for the first time after a failover. Once |
| // reregistered all subsequent re-registration attempts (e.g., due to ZK |
| // blip) will have 'force' set to false. This is important because master |
| // uses this field to know when it needs to send FrameworkRegisteredMessage |
| // vs FrameworkReregisteredMessage. |
| optional bool force = 2; |
| |
| // List of suppressed roles for which the framework does not wish to be |
| // offered resources. The framework can decide to suppress all or a subset |
| // of roles the framework (re)registers as. |
| // |
| // Note: This field is not set by scheduler driver, so will always be |
| // empty. It is added here for transformation from `v1::Call::Subscribe`. |
| repeated string suppressed_roles = 3; |
| } |
| |
| // Accepts an offer, performing the specified operations |
| // in a sequential manner. |
| // |
| // E.g. Launch a task with a newly reserved persistent volume: |
| // |
| // Accept { |
| // offer_ids: [ ... ] |
| // operations: [ |
| // { type: RESERVE, |
| // reserve: { resources: [ disk(role):2 ] } } |
| // { type: CREATE, |
| // create: { volumes: [ disk(role):1+persistence ] } } |
| // { type: LAUNCH, |
| // launch: { task_infos ... disk(role):1;disk(role):1+persistence } } |
| // ] |
| // } |
| // |
| // NOTE: Any of the offer's resources not used in the `Accept` call |
| // (e.g., to launch a task) are considered unused and might be |
| // reoffered to other frameworks. In other words, the same `OfferID` |
| // cannot be used in more than one `Accept` call. |
| // NOTE: All offers must belong to the same agent. |
| message Accept { |
| repeated OfferID offer_ids = 1; |
| repeated Offer.Operation operations = 2; |
| optional Filters filters = 3; |
| } |
| |
| // Declines an offer, signaling the master to potentially reoffer |
| // the resources to a different framework. Note that this is same |
| // as sending an Accept call with no operations. See comments on |
| // top of 'Accept' for semantics. |
| message Decline { |
| repeated OfferID offer_ids = 1; |
| optional Filters filters = 2; |
| } |
| |
| // Accepts an inverse offer. Inverse offers should only be accepted |
| // if the resources in the offer can be safely evacuated before the |
| // provided unavailability. |
| message AcceptInverseOffers { |
| repeated OfferID inverse_offer_ids = 1; |
| optional Filters filters = 2; |
| } |
| |
| // Declines an inverse offer. Inverse offers should be declined if |
| // the resources in the offer might not be safely evacuated before |
| // the provided unavailability. |
| message DeclineInverseOffers { |
| repeated OfferID inverse_offer_ids = 1; |
| optional Filters filters = 2; |
| } |
| |
| // Revive offers for the specified roles. If `roles` is empty, |
| // the `REVIVE` call will revive offers for all of the roles |
| // the framework is currently subscribed to. |
| message Revive { |
| repeated string roles = 1; |
| } |
| |
| // Kills a specific task. If the scheduler has a custom executor, |
| // the kill is forwarded to the executor and it is up to the |
| // executor to kill the task and send a TASK_KILLED (or TASK_FAILED) |
| // update. Note that Mesos releases the resources for a task once it |
| // receives a terminal update (See TaskState in mesos.proto) for it. |
| // If the task is unknown to the master, a TASK_LOST update is |
| // generated. |
| // |
| // If a task within a task group is killed before the group is |
| // delivered to the executor, all tasks in the task group are |
| // killed. When a task group has been delivered to the executor, |
| // it is up to the executor to decide how to deal with the kill. |
| // Note The default Mesos executor will currently kill all the |
| // tasks in the task group if it gets a kill for any task. |
| message Kill { |
| required TaskID task_id = 1; |
| optional SlaveID slave_id = 2; |
| |
| // If set, overrides any previously specified kill policy for this task. |
| // This includes 'TaskInfo.kill_policy' and 'Executor.kill.kill_policy'. |
| // Can be used to forcefully kill a task which is already being killed. |
| optional KillPolicy kill_policy = 3; |
| } |
| |
| // Shuts down a custom executor. When the executor gets a shutdown |
| // event, it is expected to kill all its tasks (and send TASK_KILLED |
| // updates) and terminate. If the executor doesn't terminate within |
| // a certain timeout (configurable via |
| // '--executor_shutdown_grace_period' slave flag), the slave will |
| // forcefully destroy the container (executor and its tasks) and |
| // transition its active tasks to TASK_LOST. |
| message Shutdown { |
| required ExecutorID executor_id = 1; |
| required SlaveID slave_id = 2; |
| } |
| |
| // Acknowledges the receipt of status update. Schedulers are |
| // responsible for explicitly acknowledging the receipt of status |
| // updates that have 'Update.status().uuid()' field set. Such status |
| // updates are retried by the slave until they are acknowledged by |
| // the scheduler. |
| message Acknowledge { |
| required SlaveID slave_id = 1; |
| required TaskID task_id = 2; |
| required bytes uuid = 3; |
| } |
| |
| // EXPERIMENTAL. |
| // |
| // Acknowledges the receipt of an operation status update. Schedulers |
| // are responsible for explicitly acknowledging the receipt of updates |
| // which have the 'UpdateOperationStatus.status().uuid()' field set. |
| // Such status updates are retried by the agent or resource provider until |
| // they are acknowledged by the scheduler. |
| message AcknowledgeOperationStatus { |
| // If the operation affects resources that belong to a SLRP, both |
| // `slave_id` and `resource_provider_id` have to be set. |
| // |
| // If the operation affects resources that belong to a SERP, only |
| // `resource_provider_id` has to be set. |
| optional SlaveID slave_id = 1; |
| optional ResourceProviderID resource_provider_id = 2; |
| |
| required bytes uuid = 3; |
| required OperationID operation_id = 4; |
| } |
| |
| // Allows the scheduler to query the status for non-terminal tasks. |
| // This causes the master to send back the latest task status for |
| // each task in 'tasks', if possible. Tasks that are no longer known |
| // will result in a TASK_LOST, TASK_UNKNOWN, or TASK_UNREACHABLE update. |
| // If 'tasks' is empty, then the master will send the latest status |
| // for each task currently known. |
| message Reconcile { |
| // TODO(vinod): Support arbitrary queries than just state of tasks. |
| message Task { |
| required TaskID task_id = 1; |
| optional SlaveID slave_id = 2; |
| } |
| |
| repeated Task tasks = 1; |
| } |
| |
| // EXPERIMENTAL. |
| // |
| // Allows the scheduler to query the status of operations. This causes the |
| // master to send back the latest status for each operation in 'operations', |
| // if possible. If 'operations' is empty, then the master will send the |
| // latest status for each operation currently known. |
| message ReconcileOperations { |
| message Operation { |
| required OperationID operation_id = 1; |
| optional SlaveID slave_id = 2; |
| optional ResourceProviderID resource_provider_id = 3; |
| } |
| |
| repeated Operation operations = 1; |
| } |
| |
| // Sends arbitrary binary data to the executor. Note that Mesos |
| // neither interprets this data nor makes any guarantees about the |
| // delivery of this message to the executor. |
| message Message { |
| required SlaveID slave_id = 1; |
| required ExecutorID executor_id = 2; |
| required bytes data = 3; |
| } |
| |
| // Requests a specific set of resources from Mesos's allocator. If |
| // the allocator has support for this, corresponding offers will be |
| // sent asynchronously via the OFFERS event(s). |
| // |
| // NOTE: The built-in hierarchical allocator doesn't have support |
| // for this call and hence simply ignores it. |
| message Request { |
| repeated mesos.Request requests = 1; |
| } |
| |
| // Suppress offers for the specified roles. If `roles` is empty, |
| // the `SUPPRESS` call will suppress offers for all of the roles |
| // the framework is currently subscribed to. |
| message Suppress { |
| repeated string roles = 1; |
| } |
| |
| // Updates the FrameworkInfo. All fields can be updated except for: |
| // |
| // * FrameworkInfo.checkpoint |
| // * FrameworkInfo.principal |
| // * FrameworkInfo.user |
| // |
| // The call returns after the update is either applied completely or |
| // not applied at all. No incomplete updates occur. |
| // |
| // The HTTP response codes specific to this call are: |
| // |
| // * 200 OK: update operation was successfully completed. |
| // * 400 Bad Request: the requested update is not valid. |
| // * 403 Forbidden: framework is not authorized to use some entities |
| // requested by the update (e.g. not authorized to use some of the |
| // supplied roles). |
| // * 409 Conflict: framework disappeared while this call was being processed |
| // (example: the framework was removed by a concurrent TEARDOWN call). |
| // |
| message UpdateFramework { |
| required FrameworkInfo framework_info = 1; |
| |
| // List of suppressed roles for which the framework does not wish to be |
| // offered resources. The framework can decide to suppress all or a subset |
| // of roles provided in the new `framework_info`. |
| repeated string suppressed_roles = 2; |
| } |
| |
| // Identifies who generated this call. Master assigns a framework id |
| // when a new scheduler subscribes for the first time. Once assigned, |
| // the scheduler must set the 'framework_id' here and within its |
| // FrameworkInfo (in any further 'Subscribe' calls). This allows the |
| // master to identify a scheduler correctly across disconnections, |
| // failovers, etc. |
| optional FrameworkID framework_id = 1; |
| |
| // Type of the call, indicates which optional field below should be |
| // present if that type has a nested message definition. |
| // See comments on `Event::Type` above on the reasoning behind this field |
| // being optional. |
| optional Type type = 2; |
| |
| optional Subscribe subscribe = 3; |
| optional Accept accept = 4; |
| optional Decline decline = 5; |
| optional AcceptInverseOffers accept_inverse_offers = 13; |
| optional DeclineInverseOffers decline_inverse_offers = 14; |
| optional Revive revive = 15; |
| optional Kill kill = 6; |
| optional Shutdown shutdown = 7; |
| optional Acknowledge acknowledge = 8; |
| optional AcknowledgeOperationStatus acknowledge_operation_status = 17; |
| optional Reconcile reconcile = 9; |
| optional ReconcileOperations reconcile_operations = 18; |
| optional Message message = 10; |
| optional Request request = 11; |
| optional Suppress suppress = 16; |
| optional UpdateFramework update_framework = 19; |
| } |