blob: 0049e1383f50574c3dad6a29b91811001694e82c [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import "mesos/mesos.proto";
package mesos.scheduler;
option java_package = "org.apache.mesos.scheduler";
option java_outer_classname = "Protos";
/**
* Scheduler event API.
*
* An event is described using the standard protocol buffer "union"
* trick, see:
* https://developers.google.com/protocol-buffers/docs/techniques#union.
*/
message Event {
// Possible event types, followed by message definitions if
// applicable.
enum Type {
SUBSCRIBED = 1; // See 'Subscribed' below.
OFFERS = 2; // See 'Offers' below.
RESCIND = 3; // See 'Rescind' below.
UPDATE = 4; // See 'Update' below.
MESSAGE = 5; // See 'Message' below.
FAILURE = 6; // See 'Failure' below.
ERROR = 7; // See 'Error' below.
// Periodic message sent by the Mesos master according to
// 'Subscribed.heartbeat_interval_seconds'. If the scheduler does
// not receive any events (including heartbeats) for an extended
// period of time (e.g., 5 x heartbeat_interval_seconds), there is
// likely a network partition. In such a case the scheduler should
// close the existing subscription connection and resubscribe
// using a backoff strategy.
HEARTBEAT = 8;
}
// First event received when the scheduler subscribes.
message Subscribed {
required FrameworkID framework_id = 1;
// This value will be set if the master is sending heartbeats. See
// the comment above on 'HEARTBEAT' for more details.
optional double heartbeat_interval_seconds = 2;
}
// Received whenever there are new resources that are offered to the
// scheduler or resources requested back from the scheduler. Each
// offer corresponds to a set of resources on a slave. Until the
// scheduler accepts or declines an offer the resources are
// considered allocated to the scheduler. Accepting or Declining an
// inverse offer informs the allocator of the scheduler's ability to
// release the resources without violating an SLA.
message Offers {
repeated Offer offers = 1;
repeated InverseOffer inverse_offers = 2;
}
// Received when a particular offer is no longer valid (e.g., the
// slave corresponding to the offer has been removed) and hence
// needs to be rescinded. Any future calls ('Accept' / 'Decline') made
// by the scheduler regarding this offer will be invalid.
message Rescind {
required OfferID offer_id = 1;
}
// Received whenever there is a status update that is generated by
// the executor or slave or master. Status updates should be used by
// executors to reliably communicate the status of the tasks that
// they manage. It is crucial that a terminal update (see TaskState
// in mesos.proto) is sent by the executor as soon as the task
// terminates, in order for Mesos to release the resources allocated
// to the task. It is also the responsibility of the scheduler to
// explicitly acknowledge the receipt of a status update. See
// 'Acknowledge' in the 'Call' section below for the semantics.
message Update {
required TaskStatus status = 1;
}
// Received when a custom message generated by the executor is
// forwarded by the master. Note that this message is not
// interpreted by Mesos and is only forwarded (without reliability
// guarantees) to the scheduler. It is up to the executor to retry
// if the message is dropped for any reason.
message Message {
required SlaveID slave_id = 1;
required ExecutorID executor_id = 2;
required bytes data = 3;
}
// Received when a slave is removed from the cluster (e.g., failed
// health checks) or when an executor is terminated. Note that, this
// event coincides with receipt of terminal UPDATE events for any
// active tasks belonging to the slave or executor and receipt of
// 'Rescind' events for any outstanding offers belonging to the
// slave. Note that there is no guaranteed order between the
// 'Failure', 'Update' and 'Rescind' events when a slave or executor
// is removed.
// TODO(vinod): Consider splitting the lost slave and terminated
// executor into separate events and ensure it's reliably generated.
message Failure {
optional SlaveID slave_id = 1;
// If this was just a failure of an executor on a slave then
// 'executor_id' will be set and possibly 'status' (if we were
// able to determine the exit status).
optional ExecutorID executor_id = 2;
optional int32 status = 3;
}
// Received when there is an unrecoverable error in the scheduler (e.g.,
// scheduler failed over, rate limiting, authorization errors etc.). The
// scheduler should abort on receiving this event.
message Error {
required string message = 1;
}
// Type of the event, indicates which optional field below should be
// present if that type has a nested message definition.
required Type type = 1;
optional Subscribed subscribed = 2;
optional Offers offers = 3;
optional Rescind rescind = 4;
optional Update update = 5;
optional Message message = 6;
optional Failure failure = 7;
optional Error error = 8;
}
/**
* Scheduler call API.
*
* Like Event, a Call is described using the standard protocol buffer
* "union" trick (see above).
*/
message Call {
// Possible call types, followed by message definitions if
// applicable.
enum Type {
SUBSCRIBE = 1; // See 'Subscribe' below.
TEARDOWN = 2; // Shuts down all tasks/executors and removes framework.
ACCEPT = 3; // See 'Accept' below.
DECLINE = 4; // See 'Decline' below.
REVIVE = 5; // Removes any previous filters set via ACCEPT or DECLINE.
KILL = 6; // See 'Kill' below.
SHUTDOWN = 7; // See 'Shutdown' below.
ACKNOWLEDGE = 8; // See 'Acknowledge' below.
RECONCILE = 9; // See 'Reconcile' below.
MESSAGE = 10; // See 'Message' below.
REQUEST = 11; // See 'Request' below.
SUPPRESS = 12; // Inform master to stop sending offers to the framework.
// TODO(benh): Consider adding an 'ACTIVATE' and 'DEACTIVATE' for
// already subscribed frameworks as a way of stopping offers from
// being generated and other events from being sent by the master.
// Note that this functionality existed originally to support
// SchedulerDriver::abort which was only necessary to handle
// exceptions getting thrown from within Scheduler callbacks,
// something that is not an issue with the Event/Call API.
}
// Subscribes the scheduler with the master to receive events. A
// scheduler must send other calls only after it has received the
// SUBCRIBED event.
message Subscribe {
// See the comments below on 'framework_id' on the semantics for
// 'framework_info.id'.
required FrameworkInfo framework_info = 1;
// NOTE: 'force' field is not present in v1/scheduler.proto because it is
// only used by the scheduler driver. The driver sets it to true when the
// scheduler re-registers for the first time after a failover. Once
// re-registered all subsequent re-registration attempts (e.g., due to ZK
// blip) will have 'force' set to false. This is important because master
// uses this field to know when it needs to send FrameworkRegisteredMessage
// vs FrameworkReregisteredMessage.
optional bool force = 2;
}
// Accepts an offer, performing the specified operations
// in a sequential manner.
//
// E.g. Launch a task with a newly reserved persistent volume:
//
// Accept {
// offer_ids: [ ... ]
// operations: [
// { type: RESERVE,
// reserve: { resources: [ disk(role):2 ] } }
// { type: CREATE,
// create: { volumes: [ disk(role):1+persistence ] } }
// { type: LAUNCH,
// launch: { task_infos ... disk(role):1;disk(role):1+persistence } }
// ]
// }
//
// NOTE: Any of the offer’s resources not used in the `Accept` call
// (e.g., to launch a task) are considered unused and might be
// reoffered to other frameworks. In other words, the same `OfferID`
// cannot be used in more than one `Accept` call.
// NOTE: All offers must belong to the same agent.
message Accept {
repeated OfferID offer_ids = 1;
repeated Offer.Operation operations = 2;
optional Filters filters = 3;
}
// Declines an offer, signaling the master to potentially reoffer
// the resources to a different framework. Note that this is same
// as sending an Accept call with no operations. See comments on
// top of 'Accept' for semantics.
message Decline {
repeated OfferID offer_ids = 1;
optional Filters filters = 2;
}
// Kills a specific task. If the scheduler has a custom executor,
// the kill is forwarded to the executor and it is up to the
// executor to kill the task and send a TASK_KILLED (or TASK_FAILED)
// update. Note that Mesos releases the resources for a task once it
// receives a terminal update (See TaskState in mesos.proto) for it.
// If the task is unknown to the master, a TASK_LOST update is
// generated.
message Kill {
required TaskID task_id = 1;
optional SlaveID slave_id = 2;
}
// Shuts down a custom executor. When the executor gets a shutdown
// event, it is expected to kill all its tasks (and send TASK_KILLED
// updates) and terminate. If the executor doesn’t terminate within
// a certain timeout (configurable via
// '--executor_shutdown_grace_period' slave flag), the slave will
// forcefully destroy the container (executor and its tasks) and
// transition its active tasks to TASK_LOST.
message Shutdown {
required ExecutorID executor_id = 1;
required SlaveID slave_id = 2;
}
// Acknowledges the receipt of status update. Schedulers are
// responsible for explicitly acknowledging the receipt of status
// updates that have 'Update.status().uuid()' field set. Such status
// updates are retried by the slave until they are acknowledged by
// the scheduler.
message Acknowledge {
required SlaveID slave_id = 1;
required TaskID task_id = 2;
required bytes uuid = 3;
}
// Allows the scheduler to query the status for non-terminal tasks.
// This causes the master to send back the latest task status for
// each task in 'tasks', if possible. Tasks that are no longer known
// will result in a TASK_LOST update. If 'statuses' is empty, then
// the master will send the latest status for each task currently
// known.
message Reconcile {
// TODO(vinod): Support arbitrary queries than just state of tasks.
message Task {
required TaskID task_id = 1;
optional SlaveID slave_id = 2;
}
repeated Task tasks = 1;
}
// Sends arbitrary binary data to the executor. Note that Mesos
// neither interprets this data nor makes any guarantees about the
// delivery of this message to the executor.
message Message {
required SlaveID slave_id = 1;
required ExecutorID executor_id = 2;
required bytes data = 3;
}
// Requests a specific set of resources from Mesos's allocator. If
// the allocator has support for this, corresponding offers will be
// sent asynchronously via the OFFERS event(s).
//
// NOTE: The built-in hierarchical allocator doesn't have support
// for this call and hence simply ignores it.
message Request {
repeated mesos.Request requests = 1;
}
// Identifies who generated this call. Master assigns a framework id
// when a new scheduler subscribes for the first time. Once assigned,
// the scheduler must set the 'framework_id' here and within its
// FrameworkInfo (in any further 'Subscribe' calls). This allows the
// master to identify a scheduler correctly across disconnections,
// failovers, etc.
optional FrameworkID framework_id = 1;
// Type of the call, indicates which optional field below should be
// present if that type has a nested message definition.
required Type type = 2;
optional Subscribe subscribe = 3;
optional Accept accept = 4;
optional Decline decline = 5;
optional Kill kill = 6;
optional Shutdown shutdown = 7;
optional Acknowledge acknowledge = 8;
optional Reconcile reconcile = 9;
optional Message message = 10;
optional Request request = 11;
}