blob: d51350ba9627fea964e5c440b4153c192b4ff899 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
import "mesos/mesos.proto";
import "mesos/maintenance/maintenance.proto";
import "mesos/quota/quota.proto";
import "mesos/scheduler/scheduler.proto";
package mesos.master;
option cc_enable_arenas = true;
option java_package = "org.apache.mesos.master";
option java_outer_classname = "Protos";
/**
* Calls that can be sent to the master API.
*
* A call is described using the standard protocol buffer "union"
* trick, see
* https://developers.google.com/protocol-buffers/docs/techniques#union.
*/
message Call {
enum Type {
// If a call of type `Call::FOO` requires additional parameters they can be
// included in the corresponding `Call::Foo` message. Similarly, if a call
// receives a synchronous response it will be returned as a `Response`
// message of type `Response::FOO`. Currently all calls except
// `Call::SUBSCRIBE` receive synchronous responses; `Call::SUBSCRIBE` returns
// a streaming response of `Event`.
UNKNOWN = 0;
GET_HEALTH = 1; // Retrieves the master's health status.
GET_FLAGS = 2; // Retrieves the master's flag configuration.
GET_VERSION = 3; // Retrieves the master's version information.
GET_METRICS = 4; // See 'GetMetrics' below.
GET_LOGGING_LEVEL = 5; // Retrieves the master's logging level.
SET_LOGGING_LEVEL = 6; // See 'SetLoggingLevel' below.
LIST_FILES = 7;
READ_FILE = 8; // See 'ReadFile' below.
GET_STATE = 9;
GET_AGENTS = 10;
GET_FRAMEWORKS = 11;
GET_EXECUTORS = 12; // Retrieves the information about all executors.
GET_OPERATIONS = 33; // Retrieves the information about known operations.
GET_TASKS = 13; // Retrieves the information about all known tasks.
GET_ROLES = 14; // Retrieves the information about roles.
GET_WEIGHTS = 15; // Retrieves the information about role weights.
UPDATE_WEIGHTS = 16;
GET_MASTER = 17; // Retrieves the master's information.
SUBSCRIBE = 18; // Subscribes the master to receive events.
RESERVE_RESOURCES = 19;
UNRESERVE_RESOURCES = 20;
CREATE_VOLUMES = 21; // See 'CreateVolumes' below.
DESTROY_VOLUMES = 22; // See 'DestroyVolumes' below.
GROW_VOLUME = 34; // See 'GrowVolume' below.
SHRINK_VOLUME = 35; // See 'ShrinkVolume' below.
// Retrieves the cluster's maintenance status.
GET_MAINTENANCE_STATUS = 23;
// Retrieves the cluster's maintenance schedule.
GET_MAINTENANCE_SCHEDULE = 24;
UPDATE_MAINTENANCE_SCHEDULE = 25; // See 'UpdateMaintenanceSchedule' below.
START_MAINTENANCE = 26; // See 'StartMaintenance' below.
STOP_MAINTENANCE = 27; // See 'StopMaintenance' below.
DRAIN_AGENT = 37; // See 'DrainAgent' below.
DEACTIVATE_AGENT = 38; // See 'DeactivateAgent' below.
REACTIVATE_AGENT = 39; // See 'ReactivateAgent' below.
GET_QUOTA = 28;
UPDATE_QUOTA = 36; // See 'UpdateQuota' below.
// `SET_QUOTA` and `REMOVE_QUOTA` are deprecated in favor
// of `UPDATE_QUOTA`. To reset a role's quota back to the default
// (no guarantees and no limits), simply update the role's quota
// with empty guarantees and limits.
SET_QUOTA = 29 [deprecated = true];
REMOVE_QUOTA = 30 [deprecated = true];
TEARDOWN = 31; // See 'Teardown' below.
MARK_AGENT_GONE = 32; // See 'MarkAgentGone' below.
}
// Provides a snapshot of the current metrics tracked by the master.
message GetMetrics {
// If set, `timeout` would be used to determines the maximum amount of time
// the API will take to respond. If the timeout is exceeded, some metrics
// may not be included in the response.
optional DurationInfo timeout = 1;
}
// Sets the logging verbosity level for a specified duration. Mesos uses
// [glog](https://github.com/google/glog) for logging. The library only uses
// verbose logging which means nothing will be output unless the verbosity
// level is set (by default it's 0, libprocess uses levels 1, 2, and 3).
message SetLoggingLevel {
// The verbosity level.
required uint32 level = 1;
// The duration to keep verbosity level toggled. After this duration, the
// verbosity level of log would revert to the original level.
required DurationInfo duration = 2;
}
// Provides the file listing for a directory.
message ListFiles {
required string path = 1;
}
// Reads data from a file.
message ReadFile {
// The path of file.
required string path = 1;
// Initial offset in file to start reading from.
required uint64 offset = 2;
// The maximum number of bytes to read. The read length is capped at 16
// memory pages.
optional uint64 length = 3;
}
message UpdateWeights {
repeated WeightInfo weight_infos = 1;
}
// Reserve resources dynamically on a specific agent.
message ReserveResources {
required SlaveID slave_id = 1;
repeated Resource source = 3;
repeated Resource resources = 2;
}
// Unreserve resources dynamically on a specific agent.
message UnreserveResources {
required SlaveID slave_id = 1;
repeated Resource resources = 2;
}
// Create persistent volumes on reserved resources. The request is forwarded
// asynchronously to the Mesos agent where the reserved resources are located.
// That asynchronous message may not be delivered or creating the volumes at
// the agent might fail. Volume creation can be verified by sending a
// `GET_VOLUMES` call.
message CreateVolumes {
required SlaveID slave_id = 1;
repeated Resource volumes = 2;
}
// Destroy persistent volumes. The request is forwarded asynchronously to the
// Mesos agent where the reserved resources are located. That asynchronous
// message may not be delivered or destroying the volumes at the agent might
// fail. Volume deletion can be verified by sending a `GET_VOLUMES` call.
message DestroyVolumes {
required SlaveID slave_id = 1;
repeated Resource volumes = 2;
}
// Grow a volume by an additional disk resource.
// NOTE: This is currently experimental and only for persistent volumes
// created on ROOT/PATH disks.
message GrowVolume {
// `slave_id` must be set if `volume` is an agent-local resource, and must
// be unset if `volume` is an external resource.
optional SlaveID slave_id = 1;
required Resource volume = 2;
required Resource addition = 3;
}
// Shrink a volume by the size specified in the `subtract` field.
// NOTE: This is currently experimental and only for persistent volumes
// created on ROOT/PATH disks.
message ShrinkVolume {
// `slave_id` must be set if `volume` is an agent-local resource, and must
// be unset if `volume` is an external resource.
optional SlaveID slave_id = 1;
required Resource volume = 2;
// See comments in `Value.Scalar` for maximum precision supported.
required Value.Scalar subtract = 3;
}
// Updates the cluster's maintenance schedule.
message UpdateMaintenanceSchedule {
required maintenance.Schedule schedule = 1;
}
// Starts the maintenance of the cluster, this would bring a set of machines
// down.
message StartMaintenance {
repeated MachineID machines = 1;
}
// Stops the maintenance of the cluster, this would bring a set of machines
// back up.
message StopMaintenance {
repeated MachineID machines = 1;
}
// EXPERIMENTAL.
//
// Marks an agent for automated draining of tasks.
// This prevents further tasks from being launched on the agent, by preventing
// offers from being sent for the agent (see 'DeactivateAgent'), and also
// begins killing tasks running on the agent.
message DrainAgent {
required SlaveID slave_id = 1;
// An upper bound for tasks with a KillPolicy.
// If a task has a KillPolicy grace period greater than this value,
// this value will be used instead. This allows the operator to limit
// the maximum time it will take the agent to drain.
//
// NOTE: Grace periods start when the executor receives the associated kill.
// If, for example, the agent is unreachable when this call is made,
// tasks will still receive their full grace period to kill gracefully.
optional DurationInfo max_grace_period = 2;
// Whether or not this agent will be removed permanently from the
// cluster when draining is complete. This transition is automatic
// and does **NOT** require a separate call to `MarkAgentGone`.
//
// Compared to `MarkAgentGone`, which is used for unreachable agents,
// marking agents gone after draining will respect kill policies.
// To notify frameworks, tasks terminated during draining will return
// a `TASK_GONE_BY_OPERATOR` status update instead of any other terminal
// status. Executors will not need to account for this case, because
// the terminal status update will be intercepted and modified by the agent.
optional bool mark_gone = 3 [default = false];
}
// EXPERIMENTAL.
//
// Turns off offers for a specific agent.
// A deactivated agent will continue to run tasks and communicate statuses
// with the master.
message DeactivateAgent {
required SlaveID slave_id = 1;
}
// EXPERIMENTAL.
//
// Turns on offers for a specific agent, which was previously drained or
// deactivated.
message ReactivateAgent {
required SlaveID slave_id = 1;
}
// Updates quota given the provided quota configurations, these configurations
// are applied in an all-or-nothing manner.
message UpdateQuota {
// Determines whether to skip the capacity validation. See
// `quota.QuotaConfig` for more details.
optional bool force = 1;
repeated quota.QuotaConfig quota_configs = 2;
}
// Deprecated in favor of `UpdateQuota`.
message SetQuota {
required quota.QuotaRequest quota_request = 1;
}
// Deprecated in favor of `UpdateQuota`.
message RemoveQuota {
required string role = 1;
}
// Tears down a running framework by shutting down all tasks/executors and
// removing the framework.
message Teardown {
required FrameworkID framework_id = 1;
}
// Mark an agent as gone. This can be used by an operator to assert
// that the agent instance has failed and is never coming back (e.g.,
// ephemeral instance from cloud provider). The master would shutdown
// the agent and send 'TASK_GONE_BY_OPERATOR' updates for all the running
// tasks. The persistent volumes/reservations on the agent won't be
// accessible anymore.
//
// NOTE: It is possible that the tasks might still be running
// if the operator's assertion was wrong and the agent was partitioned
// away from the master. The agent would be shutdown when it tries to
// reregister with the master when the partition heals.
message MarkAgentGone {
required SlaveID slave_id = 1;
}
optional Type type = 1;
optional GetMetrics get_metrics = 2;
optional SetLoggingLevel set_logging_level = 3;
optional ListFiles list_files = 4;
optional ReadFile read_file = 5;
optional UpdateWeights update_weights = 6;
optional ReserveResources reserve_resources = 7;
optional UnreserveResources unreserve_resources = 8;
optional CreateVolumes create_volumes = 9;
optional DestroyVolumes destroy_volumes = 10;
optional GrowVolume grow_volume = 18;
optional ShrinkVolume shrink_volume = 19;
optional UpdateMaintenanceSchedule update_maintenance_schedule = 11;
optional StartMaintenance start_maintenance = 12;
optional StopMaintenance stop_maintenance = 13;
optional DrainAgent drain_agent = 21;
optional DeactivateAgent deactivate_agent = 22;
optional ReactivateAgent reactivate_agent = 23;
optional UpdateQuota update_quota = 20;
optional Teardown teardown = 16;
optional MarkAgentGone mark_agent_gone = 17;
optional SetQuota set_quota = 14 [deprecated = true];
optional RemoveQuota remove_quota = 15 [deprecated = true];
}
/**
* Synchronous responses for all calls (except Call::SUBSCRIBE) made to
* the master API.
*/
message Response {
// Each of the responses of type `FOO` corresponds to `Foo` message below.
enum Type {
UNKNOWN = 0;
GET_HEALTH = 1; // See 'GetHealth' below.
GET_FLAGS = 2; // See 'GetFlags' below.
GET_VERSION = 3; // See 'GetVersion' below.
GET_METRICS = 4; // See 'GetMetrics' below.
GET_LOGGING_LEVEL = 5; // See 'GetLoggingLevel' below.
LIST_FILES = 6;
READ_FILE = 7; // See 'ReadFile' below.
GET_STATE = 8;
GET_AGENTS = 9;
GET_FRAMEWORKS = 10;
GET_EXECUTORS = 11; // See 'GetExecutors' below.
GET_OPERATIONS = 19; // See 'GetOperations' below.
GET_TASKS = 12; // See 'GetTasks' below.
GET_ROLES = 13; // See 'GetRoles' below.
GET_WEIGHTS = 14; // See 'GetWeights' below.
GET_MASTER = 15; // See 'GetMaster' below.
GET_MAINTENANCE_STATUS = 16; // See 'GetMaintenanceStatus' below.
GET_MAINTENANCE_SCHEDULE = 17; // See 'GetMaintenanceSchedule' below.
GET_QUOTA = 18;
}
// `healthy` would be true if the master is healthy. Delayed responses are
// also indicative of the poor health of the master.
message GetHealth {
required bool healthy = 1;
}
// Contains the flag configuration of the master.
message GetFlags {
repeated Flag flags = 1;
}
// Contains the version information of the master.
message GetVersion {
required VersionInfo version_info = 1;
}
// Contains a snapshot of the current metrics.
message GetMetrics {
repeated Metric metrics = 1;
}
// Contains the logging level of the master.
message GetLoggingLevel {
required uint32 level = 1;
}
// Contains the file listing(similar to `ls -l`) for a directory.
message ListFiles {
repeated FileInfo file_infos = 1;
}
// Contains the file data.
message ReadFile {
// The size of file (in bytes).
required uint64 size = 1;
required bytes data = 2;
}
// Contains full state of the master i.e. information about the tasks,
// agents, frameworks and executors running in the cluster.
message GetState {
optional GetTasks get_tasks = 1;
optional GetExecutors get_executors = 2;
optional GetFrameworks get_frameworks = 3;
optional GetAgents get_agents = 4;
}
message GetAgents {
message Agent {
required SlaveInfo agent_info = 1;
required bool active = 2;
optional bool deactivated = 12;
required string version = 3;
optional string pid = 4;
optional TimeInfo registered_time = 5;
optional TimeInfo reregistered_time = 6;
// Total resources (including oversubscribed resources) the agent
// provides.
repeated Resource total_resources = 7;
repeated Resource allocated_resources = 8;
repeated Resource offered_resources = 9;
repeated SlaveInfo.Capability capabilities = 10;
message ResourceProvider {
required ResourceProviderInfo resource_provider_info = 1;
repeated Resource total_resources = 2;
}
repeated ResourceProvider resource_providers = 11;
optional DrainInfo drain_info = 13;
optional TimeInfo estimated_drain_start_time = 14;
}
// Registered agents.
repeated Agent agents = 1;
// Agents which are recovered from registry but not reregistered yet.
repeated SlaveInfo recovered_agents = 2;
}
// Information about all the frameworks known to the master at the current
// time. Note that there might be frameworks unknown to the master running
// on partitioned or unsubscribed agents.
message GetFrameworks {
message Framework {
required FrameworkInfo framework_info = 1;
required bool active = 2;
required bool connected = 3;
// If true, this framework was previously subscribed but hasn't
// yet re-subscribed after a master failover. Recovered frameworks
// are only reported if one or more agents running a task or
// executor for the framework have reregistered after master
// failover.
required bool recovered = 11;
optional TimeInfo registered_time = 4;
optional TimeInfo reregistered_time = 5;
optional TimeInfo unregistered_time = 6;
// NOTE: Offers, inverse offers, allocated/offered resources
// and offer constraints are never reported via master API events.
repeated Offer offers = 7;
repeated InverseOffer inverse_offers = 8;
repeated Resource allocated_resources = 9;
repeated Resource offered_resources = 10;
optional scheduler.OfferConstraints offer_constraints = 12;
}
// Frameworks that have subscribed with the master. Note that this includes
// frameworks that are disconnected and in the process of re-subscribing.
repeated Framework frameworks = 1;
// Frameworks that have been teared down.
repeated Framework completed_frameworks = 2;
// This field previously contained frameworks that previously
// subscribed but haven't yet re-subscribed after a master failover.
// As of Mesos 1.2, this field will always be empty; recovered
// frameworks are now reported in the `frameworks` list with the
// `recovered` field set to true.
//
// TODO(neilc): Remove this field in Mesos 2.0.
repeated FrameworkInfo recovered_frameworks = 3 [deprecated=true];
}
// Lists information about all the executors known to the master at the
// current time. Note that there might be executors unknown to the master
// running on partitioned or unsubscribed agents.
message GetExecutors {
message Executor {
required ExecutorInfo executor_info = 1;
required SlaveID slave_id = 2;
}
repeated Executor executors = 1;
// As of Mesos 1.3.0, this field is deprecated and will always be
// empty.
//
// TODO(neilc): Remove this field in Mesos 2.0.
repeated Executor orphan_executors = 2 [deprecated=true];
}
// Lists information about all operations known to the master at the
// current time.
message GetOperations {
repeated Operation operations = 1;
}
// Lists information about all the tasks known to the master at the current
// time. Note that there might be tasks unknown to the master running on
// partitioned or unsubscribed agents.
message GetTasks {
// Tasks that are enqueued on the master waiting (e.g., authorizing)
// to be launched.
repeated Task pending_tasks = 1 [deprecated=true];
// Tasks that have been forwarded to the agent for launch. This
// includes tasks that are staging or running; it also includes
// tasks that have reached a terminal state but the terminal status
// update has not yet been acknowledged by the scheduler.
repeated Task tasks = 2;
// Tasks that were running on agents that have become partitioned
// from the master. If/when the agent is no longer partitioned,
// tasks running on that agent will no longer be unreachable (they
// will either be running or completed). Note that the master only
// stores a limited number of unreachable tasks; information about
// unreachable tasks is also not preserved across master failover.
repeated Task unreachable_tasks = 5;
// Tasks that have reached terminal state and have all their updates
// acknowledged by the scheduler.
repeated Task completed_tasks = 3;
// As of Mesos 1.3.0, this field is deprecated and will always be
// empty.
//
// TODO(neilc): Remove this field in Mesos 2.0.
repeated Task orphan_tasks = 4 [deprecated=true];
}
// Provides information about every role that is on the role whitelist (if
// enabled), has one or more registered frameworks or has a non-default weight
// or quota.
message GetRoles {
repeated Role roles = 1;
}
// Provides the weight information about every role.
message GetWeights {
repeated WeightInfo weight_infos = 1;
}
// Contains the master's information.
message GetMaster {
optional MasterInfo master_info = 1;
optional double start_time = 2;
optional double elected_time = 3;
}
// Contains the cluster's maintenance status.
message GetMaintenanceStatus {
required maintenance.ClusterStatus status = 1;
}
// Contains the cluster's maintenance schedule.
message GetMaintenanceSchedule {
required maintenance.Schedule schedule = 1;
}
// Contains the cluster's configured quotas.
message GetQuota {
required quota.QuotaStatus status = 1;
}
optional Type type = 1;
optional GetHealth get_health = 2;
optional GetFlags get_flags = 3;
optional GetVersion get_version = 4;
optional GetMetrics get_metrics = 5;
optional GetLoggingLevel get_logging_level = 6;
optional ListFiles list_files = 7;
optional ReadFile read_file = 8;
optional GetState get_state = 9;
optional GetAgents get_agents = 10;
optional GetFrameworks get_frameworks = 11;
optional GetExecutors get_executors = 12;
optional GetOperations get_operations = 20;
optional GetTasks get_tasks = 13;
optional GetRoles get_roles = 14;
optional GetWeights get_weights = 15;
optional GetMaster get_master = 16;
optional GetMaintenanceStatus get_maintenance_status = 17;
optional GetMaintenanceSchedule get_maintenance_schedule = 18;
optional GetQuota get_quota = 19;
}
/**
* Streaming response to `Call::SUBSCRIBE` made to the master.
*/
message Event {
enum Type {
UNKNOWN = 0;
SUBSCRIBED = 1; // See `Subscribed` below.
TASK_ADDED = 2; // See `TaskAdded` below.
TASK_UPDATED = 3; // See `TaskUpdated` below.
AGENT_ADDED = 4; // See `AgentAdded` below.
AGENT_REMOVED = 5; // See `AgentRemoved` below.
FRAMEWORK_ADDED = 6; // See `FrameworkAdded` below.
FRAMEWORK_UPDATED = 7; // See `FrameworkUpdated` below.
FRAMEWORK_REMOVED = 8; // See `FrameworkRemoved` below.
// Periodic message sent by the master to the subscriber according to
// 'Subscribed.heartbeat_interval_seconds'. If the subscriber does not
// receive any events (including heartbeats) for an extended period of
// time (e.g., 5 x heartbeat_interval_seconds), it is likely that the
// connection is lost or there is a network partition. In that case,
// the subscriber should close the existing subscription connection and
// resubscribe using a backoff strategy.
HEARTBEAT = 9;
// TODO(vinod): Fill in more events.
}
// First event received when a client subscribes.
message Subscribed {
// Snapshot of the entire cluster state. Further updates to the
// cluster state are sent as separate events on the stream.
optional Response.GetState get_state = 1;
// This value will be set if the master is sending heartbeats to
// subscribers. See the comment above on 'HEARTBEAT' for more details.
optional double heartbeat_interval_seconds = 2;
}
// Forwarded by the master when a task becomes known to it. This can happen
// when a new task is launched by the scheduler or when the task becomes
// known to the master upon an agent (re-)registration after a failover.
message TaskAdded {
required Task task = 1;
}
// Forwarded by the master when an existing task transitions to a new state.
message TaskUpdated {
required FrameworkID framework_id = 1;
// This is the status of the task corresponding to the last
// status update acknowledged by the scheduler.
required TaskStatus status = 2;
// This is the latest state of the task according to the agent,
// which can be more recent than `status` above but intermediate
// state changes may be skipped if they happen faster than the
// scheduler can acknowledge them.
required TaskState state = 3;
}
// Forwarded by the master when a framework becomes known to it.
// This can happen when a new framework registers with the master.
message FrameworkAdded {
required Response.GetFrameworks.Framework framework = 1;
}
// Forwarded by the master when a framework reregisters with the master
// upon a disconnection (network error) or upon a master failover.
message FrameworkUpdated {
required Response.GetFrameworks.Framework framework = 1;
}
// Forwarded by the master when a framework is removed. This can happen when
// a framework is explicitly teardown by the operator or if it fails to
// reregister with the master within the failover timeout.
message FrameworkRemoved {
required FrameworkInfo framework_info = 1;
}
// Forwarded by the master when an agent becomes known to it.
// This can happen when an agent registered for the first
// time, or reregistered with the master.
message AgentAdded {
required Response.GetAgents.Agent agent = 1;
}
// Forwarded by the master when an agent is removed. This
// can happen when the agent is scheduled for maintenance.
//
// NOTE: It's possible that an agent might become
// active once it has been removed, i.e. if the master
// has gc'ed its list of known "dead" agents.
// See MESOS-5965 for context.
message AgentRemoved {
required SlaveID agent_id = 1;
}
optional Type type = 1;
optional Subscribed subscribed = 2;
optional TaskAdded task_added = 3;
optional TaskUpdated task_updated = 4;
optional AgentAdded agent_added = 5;
optional AgentRemoved agent_removed = 6;
optional FrameworkAdded framework_added = 7;
optional FrameworkUpdated framework_updated = 8;
optional FrameworkRemoved framework_removed = 9;
}