| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| syntax = "proto2"; |
| |
| package mesos.v1; |
| |
| option java_package = "org.apache.mesos.v1"; |
| option java_outer_classname = "Protos"; |
| |
| |
| /** |
| * Status is used to indicate the state of the scheduler and executor |
| * driver after function calls. |
| */ |
| enum Status { |
| DRIVER_NOT_STARTED = 1; |
| DRIVER_RUNNING = 2; |
| DRIVER_ABORTED = 3; |
| DRIVER_STOPPED = 4; |
| } |
| |
| |
| /** |
| * A unique ID assigned to a framework. A framework can reuse this ID |
| * in order to do failover (see MesosSchedulerDriver). |
| */ |
| message FrameworkID { |
| required string value = 1; |
| } |
| |
| |
| /** |
| * A unique ID assigned to an offer. |
| */ |
| message OfferID { |
| required string value = 1; |
| } |
| |
| |
| /** |
| * A unique ID assigned to an agent. Currently, an agent gets a new ID |
| * whenever it (re)registers with Mesos. Framework writers shouldn't |
| * assume any binding between an agent ID and and a hostname. |
| */ |
| message AgentID { |
| required string value = 1; |
| } |
| |
| |
| /** |
| * A framework-generated ID to distinguish a task. The ID must remain |
| * unique while the task is active. A framework can reuse an ID _only_ |
| * if the previous task with the same ID has reached a terminal state |
| * (e.g., TASK_FINISHED, TASK_KILLED, etc.). However, reusing task IDs |
| * is strongly discouraged (MESOS-2198). |
| */ |
| message TaskID { |
| required string value = 1; |
| } |
| |
| |
| /** |
| * A framework-generated ID to distinguish an executor. Only one |
| * executor with the same ID can be active on the same agent at a |
| * time. However, reusing executor IDs is discouraged. |
| */ |
| message ExecutorID { |
| required string value = 1; |
| } |
| |
| |
| /** |
| * ID used to uniquely identify a container. If the `parent` is not |
| * specified, the ID is a UUID generated by the agent to uniquely |
| * identify the container of an executor run. If the `parent` field is |
| * specified, it represents a nested container. |
| */ |
| message ContainerID { |
| required string value = 1; |
| optional ContainerID parent = 2; |
| } |
| |
| |
| /** |
| * A unique ID assigned to a resource provider. Currently, a resource |
| * provider gets a new ID whenever it (re)registers with Mesos. |
| */ |
| message ResourceProviderID { |
| required string value = 1; |
| } |
| |
| |
| /** |
| * A framework-generated ID to distinguish an operation. The ID |
| * must be unique within the framework. |
| */ |
| message OperationID { |
| required string value = 1; |
| } |
| |
| |
| /** |
| * Represents time since the epoch, in nanoseconds. |
| */ |
| message TimeInfo { |
| required int64 nanoseconds = 1; |
| } |
| |
| |
| /** |
| * Represents duration in nanoseconds. |
| */ |
| message DurationInfo { |
| required int64 nanoseconds = 1; |
| } |
| |
| |
| /** |
| * A network address. |
| * |
| * TODO(bmahler): Use this more widely. |
| */ |
| message Address { |
| // May contain a hostname, IP address, or both. |
| optional string hostname = 1; |
| optional string ip = 2; |
| |
| required int32 port = 3; |
| } |
| |
| |
| /** |
| * Represents a URL. |
| */ |
| message URL { |
| required string scheme = 1; |
| required Address address = 2; |
| optional string path = 3; |
| repeated Parameter query = 4; |
| optional string fragment = 5; |
| } |
| |
| |
| /** |
| * Represents an interval, from a given start time over a given duration. |
| * This interval pertains to an unavailability event, such as maintenance, |
| * and is not a generic interval. |
| */ |
| message Unavailability { |
| required TimeInfo start = 1; |
| |
| // When added to `start`, this represents the end of the interval. |
| // If unspecified, the duration is assumed to be infinite. |
| optional DurationInfo duration = 2; |
| |
| // TODO(josephw): Add additional fields for expressing the purpose and |
| // urgency of the unavailability event. |
| } |
| |
| |
| /** |
| * Represents a single machine, which may hold one or more agents. |
| * |
| * NOTE: In order to match an agent to a machine, both the `hostname` and |
| * `ip` must match the values advertised by the agent to the master. |
| * Hostname is not case-sensitive. |
| */ |
| message MachineID { |
| optional string hostname = 1; |
| optional string ip = 2; |
| } |
| |
| |
| /** |
| * Holds information about a single machine, its `mode`, and any other |
| * relevant information which may affect the behavior of the machine. |
| */ |
| message MachineInfo { |
| // Describes the several states that a machine can be in. A `Mode` |
| // applies to a machine and to all associated agents on the machine. |
| enum Mode { |
| // In this mode, a machine is behaving normally; |
| // offering resources, executing tasks, etc. |
| UP = 1; |
| |
| // In this mode, all agents on the machine are expected to cooperate with |
| // frameworks to drain resources. In general, draining is done ahead of |
| // a pending `unavailability`. The resources should be drained so as to |
| // maximize utilization prior to the maintenance but without knowingly |
| // violating the frameworks' requirements. |
| DRAINING = 2; |
| |
| // In this mode, a machine is not running any tasks and will not offer |
| // any of its resources. Agents on the machine will not be allowed to |
| // register with the master. |
| DOWN = 3; |
| } |
| |
| required MachineID id = 1; |
| optional Mode mode = 2; |
| |
| // Signifies that the machine may be unavailable during the given interval. |
| // See comments in `Unavailability` and for the `unavailability` fields |
| // in `Offer` and `InverseOffer` for more information. |
| optional Unavailability unavailability = 3; |
| } |
| |
| |
| /** |
| * Describes a framework. |
| */ |
| message FrameworkInfo { |
| // Used to determine the Unix user that an executor or task should be |
| // launched as. |
| // |
| // When using the MesosSchedulerDriver, if the field is set to an |
| // empty string, it will automagically set it to the current user. |
| // |
| // When using the HTTP Scheduler API, the user has to be set |
| // explicitly. |
| required string user = 1; |
| |
| // Name of the framework that shows up in the Mesos Web UI. |
| required string name = 2; |
| |
| // Used to uniquely identify the framework. |
| // |
| // This field must be unset when the framework subscribes for the |
| // first time upon which the master will assign a new ID. To |
| // resubscribe after scheduler failover the framework should set |
| // 'id' to the ID assigned by the master. Setting 'id' to values |
| // not assigned by Mesos masters is unsupported. |
| optional FrameworkID id = 3; |
| |
| // The amount of time (in seconds) that the master will wait for the |
| // scheduler to failover before it tears down the framework by |
| // killing all its tasks/executors. This should be non-zero if a |
| // framework expects to reconnect after a failure and not lose its |
| // tasks/executors. |
| // |
| // NOTE: To avoid accidental destruction of tasks, production |
| // frameworks typically set this to a large value (e.g., 1 week). |
| optional double failover_timeout = 4 [default = 0.0]; |
| |
| // If set, agents running tasks started by this framework will write |
| // the framework pid, executor pids and status updates to disk. If |
| // the agent exits (e.g., due to a crash or as part of upgrading |
| // Mesos), this checkpointed data allows the restarted agent to |
| // reconnect to executors that were started by the old instance of |
| // the agent. Enabling checkpointing improves fault tolerance, at |
| // the cost of a (usually small) increase in disk I/O. |
| optional bool checkpoint = 5 [default = false]; |
| |
| // Roles are the entities to which allocations are made. |
| // The framework must have at least one role in order to |
| // be offered resources. Note that `role` is deprecated |
| // in favor of `roles` and only one of these fields must |
| // be used. Since we cannot distinguish between empty |
| // `roles` and the default unset `role`, we require that |
| // frameworks set the `MULTI_ROLE` capability if |
| // setting the `roles` field. |
| optional string role = 6 [default = "*", deprecated=true]; |
| repeated string roles = 12; |
| |
| // Used to indicate the current host from which the scheduler is |
| // registered in the Mesos Web UI. If set to an empty string Mesos |
| // will automagically set it to the current hostname if one is |
| // available. |
| optional string hostname = 7; |
| |
| // This field should match the credential's principal the framework |
| // uses for authentication. This field is used for framework API |
| // rate limiting and dynamic reservations. It should be set even |
| // if authentication is not enabled if these features are desired. |
| optional string principal = 8; |
| |
| // This field allows a framework to advertise its web UI, so that |
| // the Mesos web UI can link to it. It is expected to be a full URL, |
| // for example http://my-scheduler.example.com:8080/. |
| optional string webui_url = 9; |
| |
| message Capability { |
| enum Type { |
| // This must be the first enum value in this list, to |
| // ensure that if 'type' is not set, the default value |
| // is UNKNOWN. This enables enum values to be added |
| // in a backwards-compatible way. See: MESOS-4997. |
| UNKNOWN = 0; |
| |
| // Receive offers with revocable resources. See 'Resource' |
| // message for details. |
| REVOCABLE_RESOURCES = 1; |
| |
| // Receive the TASK_KILLING TaskState when a task is being |
| // killed by an executor. The executor will examine this |
| // capability to determine whether it can send TASK_KILLING. |
| TASK_KILLING_STATE = 2; |
| |
| // Indicates whether the framework is aware of GPU resources. |
| // Frameworks that are aware of GPU resources are expected to |
| // avoid placing non-GPU workloads on GPU agents, in order |
| // to avoid occupying a GPU agent and preventing GPU workloads |
| // from running! Currently, if a framework is unaware of GPU |
| // resources, it will not be offered *any* of the resources on |
| // an agent with GPUs. This restriction is in place because we |
| // do not have a revocation mechanism that ensures GPU workloads |
| // can evict GPU agent occupants if necessary. |
| // |
| // TODO(bmahler): As we add revocation we can relax the |
| // restriction here. See MESOS-5634 for more information. |
| GPU_RESOURCES = 3; |
| |
| // Receive offers with resources that are shared. |
| SHARED_RESOURCES = 4; |
| |
| // Indicates that (1) the framework is prepared to handle the |
| // following TaskStates: TASK_UNREACHABLE, TASK_DROPPED, |
| // TASK_GONE, TASK_GONE_BY_OPERATOR, and TASK_UNKNOWN, and (2) |
| // the framework will assume responsibility for managing |
| // partitioned tasks that reregister with the master. |
| // |
| // Frameworks that enable this capability can define how they |
| // would like to handle partitioned tasks. Frameworks will |
| // receive TASK_UNREACHABLE for tasks on agents that are |
| // partitioned from the master. |
| // |
| // Without this capability, frameworks will receive TASK_LOST |
| // for tasks on partitioned agents. |
| // NOTE: Prior to Mesos 1.5, such tasks will be killed by Mesos |
| // when the agent reregisters (unless the master has failed over). |
| // However due to the lack of benefit in maintaining different |
| // behaviors depending on whether the master has failed over |
| // (see MESOS-7215), as of 1.5, Mesos will not kill these |
| // tasks in either case. |
| PARTITION_AWARE = 5; |
| |
| // This expresses the ability for the framework to be |
| // "multi-tenant" via using the newly introduced `roles` |
| // field, and examining `Offer.allocation_info` to determine |
| // which role the offers are being made to. We also |
| // expect that "single-tenant" schedulers eventually |
| // provide this and move away from the deprecated |
| // `role` field. |
| MULTI_ROLE = 6; |
| |
| // This capability has two effects for a framework. |
| // |
| // (1) The framework is offered resources in a new format. |
| // |
| // The offered resources have the `Resource.reservations` field set |
| // rather than `Resource.role` and `Resource.reservation`. In short, |
| // an empty `reservations` field denotes unreserved resources, and |
| // each `ReservationInfo` in the `reservations` field denotes a |
| // reservation that refines the previous one. |
| // |
| // See the 'Resource Format' section for more details. |
| // |
| // (2) The framework can create refined reservations. |
| // |
| // A framework can refine an existing reservation via the |
| // `Resource.reservations` field. For example, a reservation for role |
| // `eng` can be refined to `eng/front_end`. |
| // |
| // See `ReservationInfo.reservations` for more details. |
| // |
| // NOTE: Without this capability, a framework is not offered resources |
| // that have refined reservations. A resource is said to have refined |
| // reservations if it uses the `Resource.reservations` field, and |
| // `Resource.reservations_size() > 1`. |
| RESERVATION_REFINEMENT = 7; // EXPERIMENTAL. |
| |
| // Indicates that the framework is prepared to receive offers |
| // for agents whose region is different from the master's |
| // region. Network links between hosts in different regions |
| // typically have higher latency and lower bandwidth than |
| // network links within a region, so frameworks should be |
| // careful to only place suitable workloads in remote regions. |
| // Frameworks that are not region-aware will never receive |
| // offers for remote agents; region-aware frameworks are assumed |
| // to implement their own logic to decide which workloads (if |
| // any) are suitable for placement on remote agents. |
| REGION_AWARE = 8; |
| } |
| |
| // Enum fields should be optional, see: MESOS-4997. |
| optional Type type = 1; |
| } |
| |
| // This field allows a framework to advertise its set of |
| // capabilities (e.g., ability to receive offers for revocable |
| // resources). |
| repeated Capability capabilities = 10; |
| |
| // Labels are free-form key value pairs supplied by the framework |
| // scheduler (e.g., to describe additional functionality offered by |
| // the framework). These labels are not interpreted by Mesos itself. |
| // Labels should not contain duplicate key-value pairs. |
| optional Labels labels = 11; |
| |
| // Specifc resource requirements for each of the framework's roles. This field |
| // is used by e.g., the default allocator to decide whether a framework is |
| // interested in seeing a resource of a certain shape. |
| map<string, OfferFilters> offer_filters = 13; |
| } |
| |
| |
| /** |
| * Describes a general non-interpreting non-killing check for a task or |
| * executor (or any arbitrary process/command). A type is picked by |
| * specifying one of the optional fields. Specifying more than one type |
| * is an error. |
| * |
| * NOTE: This API is subject to change and the related feature is experimental. |
| */ |
| message CheckInfo { |
| enum Type { |
| UNKNOWN = 0; |
| COMMAND = 1; |
| HTTP = 2; |
| TCP = 3; |
| |
| // TODO(alexr): Consider supporting custom user checks. They should |
| // probably be paired with a `data` field and complemented by a |
| // `data` response in `CheckStatusInfo`. |
| } |
| |
| // Describes a command check. If applicable, enters mount and/or network |
| // namespaces of the task. |
| message Command { |
| required CommandInfo command = 1; |
| } |
| |
| // Describes an HTTP check. Sends a GET request to |
| // http://<host>:port/path. Note that <host> is not configurable and is |
| // resolved automatically to 127.0.0.1. |
| message Http { |
| // Port to send the HTTP request. |
| required uint32 port = 1; |
| |
| // HTTP request path. |
| optional string path = 2; |
| |
| // TODO(alexr): Add support for HTTP method. While adding POST |
| // and PUT is simple, supporting payload is more involved. |
| |
| // TODO(alexr): Add support for custom HTTP headers. |
| |
| // TODO(alexr): Consider adding an optional message to describe TLS |
| // options and thus enabling https. Such message might contain certificate |
| // validation, TLS version. |
| } |
| |
| // Describes a TCP check, i.e. based on establishing a TCP connection to |
| // the specified port. Note that <host> is not configurable and is resolved |
| // automatically to 127.0.0.1. |
| message Tcp { |
| required uint32 port = 1; |
| } |
| |
| // The type of the check. |
| optional Type type = 1; |
| |
| // Command check. |
| optional Command command = 2; |
| |
| // HTTP check. |
| optional Http http = 3; |
| |
| // TCP check. |
| optional Tcp tcp = 7; |
| |
| // Amount of time to wait to start checking the task after it |
| // transitions to `TASK_RUNNING` or `TASK_STARTING` if the latter |
| // is used by the executor. |
| optional double delay_seconds = 4 [default = 15.0]; |
| |
| // Interval between check attempts, i.e., amount of time to wait after |
| // the previous check finished or timed out to start the next check. |
| optional double interval_seconds = 5 [default = 10.0]; |
| |
| // Amount of time to wait for the check to complete. Zero means infinite |
| // timeout. |
| // |
| // After this timeout, the check attempt is aborted and no result is |
| // reported. Note that this may be considered a state change and hence |
| // may trigger a check status change delivery to the corresponding |
| // scheduler. See `CheckStatusInfo` for more details. |
| optional double timeout_seconds = 6 [default = 20.0]; |
| } |
| |
| |
| /** |
| * Describes a health check for a task or executor (or any arbitrary |
| * process/command). A type is picked by specifying one of the |
| * optional fields. Specifying more than one type is an error. |
| */ |
| message HealthCheck { |
| enum Type { |
| UNKNOWN = 0; |
| COMMAND = 1; |
| HTTP = 2; |
| TCP = 3; |
| } |
| |
| // Describes an HTTP health check. Sends a GET request to |
| // scheme://<host>:port/path. Note that <host> is not configurable and is |
| // resolved automatically, in most cases to 127.0.0.1. Default executors |
| // treat return codes between 200 and 399 as success; custom executors |
| // may employ a different strategy, e.g. leveraging the `statuses` field. |
| message HTTPCheckInfo { |
| optional NetworkInfo.Protocol protocol = 5 [default = IPv4]; |
| |
| // Currently "http" and "https" are supported. |
| optional string scheme = 3; |
| |
| // Port to send the HTTP request. |
| required uint32 port = 1; |
| |
| // HTTP request path. |
| optional string path = 2; |
| |
| // TODO(alexr): Add support for HTTP method. While adding POST |
| // and PUT is simple, supporting payload is more involved. |
| |
| // TODO(alexr): Add support for custom HTTP headers. |
| |
| // TODO(alexr): Add support for success and possibly failure |
| // statuses. |
| |
| // NOTE: It is up to the custom executor to interpret and act on this |
| // field. Setting this field has no effect on the default executors. |
| // |
| // TODO(haosdent): Deprecate this field when we add better support for |
| // success and possibly failure statuses, e.g. ranges of success and |
| // failure statuses. |
| repeated uint32 statuses = 4; |
| |
| // TODO(haosdent): Consider adding a flag to enable task's certificate |
| // validation for HTTPS health checks, see MESOS-5997. |
| |
| // TODO(benh): Include an 'optional bytes data' field for checking |
| // for specific data in the response. |
| } |
| |
| // Describes a TCP health check, i.e. based on establishing |
| // a TCP connection to the specified port. |
| message TCPCheckInfo { |
| optional NetworkInfo.Protocol protocol = 2 [default = IPv4]; |
| |
| // Port expected to be open. |
| required uint32 port = 1; |
| } |
| |
| // TODO(benh): Consider adding a URL health check strategy which |
| // allows doing something similar to the HTTP strategy but |
| // encapsulates all the details in a single string field. |
| |
| // Amount of time to wait to start health checking the task after it |
| // transitions to `TASK_RUNNING` or `TASK_STATING` if the latter is |
| // used by the executor. |
| optional double delay_seconds = 2 [default = 15.0]; |
| |
| // Interval between health checks, i.e., amount of time to wait after |
| // the previous health check finished or timed out to start the next |
| // health check. |
| optional double interval_seconds = 3 [default = 10.0]; |
| |
| // Amount of time to wait for the health check to complete. After this |
| // timeout, the health check is aborted and treated as a failure. Zero |
| // means infinite timeout. |
| optional double timeout_seconds = 4 [default = 20.0]; |
| |
| // Number of consecutive failures until the task is killed by the executor. |
| optional uint32 consecutive_failures = 5 [default = 3]; |
| |
| // Amount of time after the task is launched during which health check |
| // failures are ignored. Once a check succeeds for the first time, |
| // the grace period does not apply anymore. Note that it includes |
| // `delay_seconds`, i.e., setting `grace_period_seconds` < `delay_seconds` |
| // has no effect. |
| optional double grace_period_seconds = 6 [default = 10.0]; |
| |
| // TODO(alexr): Add an optional `KillPolicy` that should be used |
| // if the task is killed because of a health check failure. |
| |
| // The type of health check. |
| optional Type type = 8; |
| |
| // Command health check. |
| optional CommandInfo command = 7; |
| |
| // HTTP health check. |
| optional HTTPCheckInfo http = 1; |
| |
| // TCP health check. |
| optional TCPCheckInfo tcp = 9; |
| } |
| |
| |
| /** |
| * Describes a kill policy for a task. Currently does not express |
| * different policies (e.g. hitting HTTP endpoints), only controls |
| * how long to wait between graceful and forcible task kill: |
| * |
| * graceful kill --------------> forcible kill |
| * grace_period |
| * |
| * Kill policies are best-effort, because machine failures / forcible |
| * terminations may occur. |
| * |
| * NOTE: For executor-less command-based tasks, the kill is performed |
| * via sending a signal to the task process: SIGTERM for the graceful |
| * kill and SIGKILL for the forcible kill. For the docker executor-less |
| * tasks the grace period is passed to 'docker stop --time'. |
| */ |
| message KillPolicy { |
| // The grace period specifies how long to wait before forcibly |
| // killing the task. It is recommended to attempt to gracefully |
| // kill the task (and send TASK_KILLING) to indicate that the |
| // graceful kill is in progress. Once the grace period elapses, |
| // if the task has not terminated, a forcible kill should occur. |
| // The task should not assume that it will always be allotted |
| // the full grace period. For example, the executor may be |
| // shutdown more quickly by the agent, or failures / forcible |
| // terminations may occur. |
| optional DurationInfo grace_period = 1; |
| } |
| |
| |
| /** |
| * Describes a command, executed via: '/bin/sh -c value'. Any URIs specified |
| * are fetched before executing the command. If the executable field for an |
| * uri is set, executable file permission is set on the downloaded file. |
| * Otherwise, if the downloaded file has a recognized archive extension |
| * (currently [compressed] tar and zip) it is extracted into the executor's |
| * working directory. This extraction can be disabled by setting `extract` to |
| * false. In addition, any environment variables are set before executing |
| * the command (so they can be used to "parameterize" your command). |
| */ |
| message CommandInfo { |
| message URI { |
| required string value = 1; |
| optional bool executable = 2; |
| |
| // In case the fetched file is recognized as an archive, extract |
| // its contents into the sandbox. Note that a cached archive is |
| // not copied from the cache to the sandbox in case extraction |
| // originates from an archive in the cache. |
| optional bool extract = 3 [default = true]; |
| |
| // If this field is "true", the fetcher cache will be used. If not, |
| // fetching bypasses the cache and downloads directly into the |
| // sandbox directory, no matter whether a suitable cache file is |
| // available or not. The former directs the fetcher to download to |
| // the file cache, then copy from there to the sandbox. Subsequent |
| // fetch attempts with the same URI will omit downloading and copy |
| // from the cache as long as the file is resident there. Cache files |
| // may get evicted at any time, which then leads to renewed |
| // downloading. See also "docs/fetcher.md" and |
| // "docs/fetcher-cache-internals.md". |
| optional bool cache = 4; |
| |
| // The fetcher's default behavior is to use the URI string's basename to |
| // name the local copy. If this field is provided, the local copy will be |
| // named with its value instead. If there is a directory component (which |
| // must be a relative path), the local copy will be stored in that |
| // subdirectory inside the sandbox. |
| optional string output_file = 5; |
| } |
| |
| repeated URI uris = 1; |
| |
| optional Environment environment = 2; |
| |
| // There are two ways to specify the command: |
| // 1) If 'shell == true', the command will be launched via shell |
| // (i.e., /bin/sh -c 'value'). The 'value' specified will be |
| // treated as the shell command. The 'arguments' will be ignored. |
| // 2) If 'shell == false', the command will be launched by passing |
| // arguments to an executable. The 'value' specified will be |
| // treated as the filename of the executable. The 'arguments' |
| // will be treated as the arguments to the executable. This is |
| // similar to how POSIX exec families launch processes (i.e., |
| // execlp(value, arguments(0), arguments(1), ...)). |
| // NOTE: The field 'value' is changed from 'required' to 'optional' |
| // in 0.20.0. It will only cause issues if a new framework is |
| // connecting to an old master. |
| optional bool shell = 6 [default = true]; |
| optional string value = 3; |
| repeated string arguments = 7; |
| |
| // Enables executor and tasks to run as a specific user. If the user |
| // field is present both in FrameworkInfo and here, the CommandInfo |
| // user value takes precedence. |
| optional string user = 5; |
| } |
| |
| |
| /** |
| * Describes information about an executor. |
| */ |
| message ExecutorInfo { |
| enum Type { |
| UNKNOWN = 0; |
| |
| // Mesos provides a simple built-in default executor that frameworks can |
| // leverage to run shell commands and containers. |
| // |
| // NOTES: |
| // |
| // 1) `command` must not be set when using a default executor. |
| // |
| // 2) Default executor only accepts a *single* `LAUNCH` or `LAUNCH_GROUP` |
| // operation. |
| // |
| // 3) If `container` is set, `container.type` must be `MESOS` |
| // and `container.mesos.image` must not be set. |
| DEFAULT = 1; |
| |
| // For frameworks that need custom functionality to run tasks, a `CUSTOM` |
| // executor can be used. Note that `command` must be set when using a |
| // `CUSTOM` executor. |
| CUSTOM = 2; |
| } |
| |
| // For backwards compatibility, if this field is not set when using `LAUNCH` |
| // operation, Mesos will infer the type by checking if `command` is set |
| // (`CUSTOM`) or unset (`DEFAULT`). `type` must be set when using |
| // `LAUNCH_GROUP` operation. |
| // |
| // TODO(vinod): Add support for explicitly setting `type` to `DEFAULT` in |
| // `LAUNCH` operation. |
| optional Type type = 15; |
| |
| required ExecutorID executor_id = 1; |
| optional FrameworkID framework_id = 8; // TODO(benh): Make this required. |
| optional CommandInfo command = 7; |
| |
| // Executor provided with a container will launch the container |
| // with the executor's CommandInfo and we expect the container to |
| // act as a Mesos executor. |
| optional ContainerInfo container = 11; |
| |
| repeated Resource resources = 5; |
| optional string name = 9; |
| |
| // 'source' is an identifier style string used by frameworks to |
| // track the source of an executor. This is useful when it's |
| // possible for different executor ids to be related semantically. |
| // |
| // NOTE: 'source' is exposed alongside the resource usage of the |
| // executor via JSON on the agent. This allows users to import usage |
| // information into a time series database for monitoring. |
| // |
| // This field is deprecated since 1.0. Please use labels for |
| // free-form metadata instead. |
| optional string source = 10 [deprecated = true]; // Since 1.0. |
| |
| // This field can be used to pass arbitrary bytes to an executor. |
| optional bytes data = 4; |
| |
| // Service discovery information for the executor. It is not |
| // interpreted or acted upon by Mesos. It is up to a service |
| // discovery system to use this information as needed and to handle |
| // executors without service discovery information. |
| optional DiscoveryInfo discovery = 12; |
| |
| // When shutting down an executor the agent will wait in a |
| // best-effort manner for the grace period specified here |
| // before forcibly destroying the container. The executor |
| // must not assume that it will always be allotted the full |
| // grace period, as the agent may decide to allot a shorter |
| // period and failures / forcible terminations may occur. |
| optional DurationInfo shutdown_grace_period = 13; |
| |
| // Labels are free-form key value pairs which are exposed through |
| // master and agent endpoints. Labels will not be interpreted or |
| // acted upon by Mesos itself. As opposed to the data field, labels |
| // will be kept in memory on master and agent processes. Therefore, |
| // labels should be used to tag executors with lightweight metadata. |
| // Labels should not contain duplicate key-value pairs. |
| optional Labels labels = 14; |
| } |
| |
| |
| /** |
| * Describes a domain. A domain is a collection of hosts that have |
| * similar characteristics. Mesos currently only supports "fault |
| * domains", which identify groups of hosts with similar failure |
| * characteristics. |
| * |
| * Frameworks can generally assume that network links between hosts in |
| * the same fault domain have lower latency, higher bandwidth, and better |
| * availability than network links between hosts in different domains. |
| * Schedulers may prefer to place network-intensive workloads in the |
| * same domain, as this may improve performance. Conversely, a single |
| * failure that affects a host in a domain may be more likely to |
| * affect other hosts in the same domain; hence, schedulers may prefer |
| * to place workloads that require high availability in multiple |
| * domains. (For example, all the hosts in a single rack might lose |
| * power or network connectivity simultaneously.) |
| * |
| * There are two kinds of fault domains: regions and zones. Regions |
| * offer the highest degree of fault isolation, but network latency |
| * between regions is typically high (typically >50 ms). Zones offer a |
| * modest degree of fault isolation along with reasonably low network |
| * latency (typically <10 ms). |
| * |
| * The mapping from fault domains to physical infrastructure is up to |
| * the operator to configure. In cloud environments, regions and zones |
| * can be mapped to the "region" and "availability zone" concepts |
| * exposed by most cloud providers, respectively. In on-premise |
| * deployments, regions and zones can be mapped to data centers and |
| * racks, respectively. |
| * |
| * Both masters and agents can be configured with domains. Frameworks |
| * can compare the domains of two hosts to determine if the hosts are |
| * in the same zone, in different zones in the same region, or in |
| * different regions. Note that all masters in a given Mesos cluster |
| * must be in the same region. |
| * |
| * Complex deployments may have additional levels of hierarchy: for example, |
| * multiple racks might be grouped together into "halls" and multiple DCs in |
| * the same geographical vicinity might be grouped together. As a convention, |
| * the recommended way to represent additional levels of hierarchy is via dot- |
| * separated labels in the existing zone and region fields. For example, the |
| * fact that racks "abc" and "def" are in the same hall might be represented |
| * using the zone names "rack-abc.hall-1" and "rack-def.hall-1", for example. |
| * Software that is not aware of this additional structure will compare the |
| * zone names for equality- hence, the two zones will be treated as different |
| * (unrelated) zones. Software that is "hall-aware" can inspect the zone names |
| * and make use of the additional hierarchy. |
| */ |
| message DomainInfo { |
| message FaultDomain { |
| message RegionInfo { |
| required string name = 1; |
| } |
| |
| message ZoneInfo { |
| required string name = 1; |
| } |
| |
| required RegionInfo region = 1; |
| required ZoneInfo zone = 2; |
| } |
| |
| optional FaultDomain fault_domain = 1; |
| } |
| |
| |
| /** |
| * Describes a master. This will probably have more fields in the |
| * future which might be used, for example, to link a framework webui |
| * to a master webui. |
| */ |
| message MasterInfo { |
| required string id = 1; |
| |
| // The IP address (only IPv4) as a packed 4-bytes integer, |
| // stored in network order. Deprecated, use `address.ip` instead. |
| required uint32 ip = 2; |
| |
| // The TCP port the Master is listening on for incoming |
| // HTTP requests; deprecated, use `address.port` instead. |
| required uint32 port = 3 [default = 5050]; |
| |
| // In the default implementation, this will contain information |
| // about both the IP address, port and Master name; it should really |
| // not be relied upon by external tooling/frameworks and be |
| // considered an "internal" implementation field. |
| optional string pid = 4; |
| |
| // The server's hostname, if available; it may be unreliable |
| // in environments where the DNS configuration does not resolve |
| // internal hostnames (eg, some public cloud providers). |
| // Deprecated, use `address.hostname` instead. |
| optional string hostname = 5; |
| |
| // The running Master version, as a string; taken from the |
| // generated "master/version.hpp". |
| optional string version = 6; |
| |
| // The full IP address (supports both IPv4 and IPv6 formats) |
| // and supersedes the use of `ip`, `port` and `hostname`. |
| // Since Mesos 0.24. |
| optional Address address = 7; |
| |
| // The domain that this master belongs to. All masters in a Mesos |
| // cluster should belong to the same region. |
| optional DomainInfo domain = 8; |
| |
| message Capability { |
| enum Type { |
| UNKNOWN = 0; |
| |
| // NOTE: When the master starts to use a new capability that |
| // may prevent compatible downgrade, remember to add the |
| // capability to `Registry::MinimumCapability`. Conversely, |
| // the added minimum capability should be removed if the capability |
| // is deemed to be no longer required for compatible downgrade. |
| // See MESOS-8878 for more details. |
| |
| // The master can handle slaves whose state |
| // changes after reregistering. |
| AGENT_UPDATE = 1; |
| |
| // The master can drain or deactivate agents when requested |
| // via operator APIs. |
| AGENT_DRAINING = 2; |
| } |
| optional Type type = 1; |
| } |
| |
| repeated Capability capabilities = 9; |
| } |
| |
| |
| /** |
| * Describes an agent. Note that the 'id' field is only available |
| * after an agent is registered with the master, and is made available |
| * here to facilitate re-registration. |
| */ |
| message AgentInfo { |
| required string hostname = 1; |
| optional int32 port = 8 [default = 5051]; |
| |
| // The configured resources at the agent. This does not include any |
| // dynamic reservations or persistent volumes that may currently |
| // exist at the agent. |
| repeated Resource resources = 3; |
| |
| repeated Attribute attributes = 5; |
| optional AgentID id = 6; |
| |
| // The domain that this agent belongs to. If the agent's region |
| // differs from the master's region, it will not appear in resource |
| // offers to frameworks that have not enabled the REGION_AWARE |
| // capability. |
| optional DomainInfo domain = 10; |
| |
| message Capability { |
| enum Type { |
| // This must be the first enum value in this list, to |
| // ensure that if 'type' is not set, the default value |
| // is UNKNOWN. This enables enum values to be added |
| // in a backwards-compatible way. See: MESOS-4997. |
| UNKNOWN = 0; |
| |
| // This expresses the ability for the agent to be able |
| // to launch tasks of a 'multi-role' framework. |
| MULTI_ROLE = 1; |
| |
| // This expresses the ability for the agent to be able to launch |
| // tasks, reserve resources, and create volumes using resources |
| // allocated to a 'hierarchical-role'. |
| // NOTE: This capability is required specifically for creating |
| // volumes because a hierchical role includes '/' (slashes) in them. |
| // Agents with this capability know to transform the '/' (slashes) |
| // into ' ' (spaces). |
| HIERARCHICAL_ROLE = 2; |
| |
| // This capability has three effects for an agent. |
| // |
| // (1) The format of the checkpointed resources, and |
| // the resources reported to master. |
| // |
| // These resources are reported in the "pre-reservation-refinement" |
| // format if none of the resources have refined reservations. If any |
| // of the resources have refined reservations, they are reported in |
| // the "post-reservation-refinement" format. The purpose is to allow |
| // downgrading of an agent as well as communication with a pre-1.4.0 |
| // master until the reservation refinement feature is actually used. |
| // |
| // See the 'Resource Format' section for more details. |
| // |
| // (2) The format of the resources reported by the HTTP endpoints. |
| // |
| // For resources reported by agent endpoints, the |
| // "pre-reservation-refinement" format is "injected" if possible. |
| // That is, resources without refined reservations will have the |
| // `Resource.role` and `Resource.reservation` set, whereas |
| // resources with refined reservations will not. |
| // |
| // See the 'Resource Format' section for more details. |
| // |
| // (3) The ability for the agent to launch tasks, reserve resources, and |
| // create volumes using resources that have refined reservations. |
| // |
| // See `ReservationInfo.reservations` section for more details. |
| // |
| // NOTE: Resources are said to have refined reservations if it uses the |
| // `Resource.reservations` field, and `Resource.reservations_size() > 1`. |
| RESERVATION_REFINEMENT = 3; |
| |
| // This expresses the ability for the agent to handle resource |
| // provider related operations. This includes the following: |
| // |
| // (1) The ability to report resources that are provided by some |
| // local resource providers through the resource provider API. |
| // |
| // (2) The ability to provide operation feedback. This also means |
| // that this capability is a prerequisite for full support of |
| // feedback for operations on agent default resources. If an |
| // agent has the mandatory AGENT_OPERATION_FEEDBACK capability |
| // set but not the RESOURCE_PROVIDER capability, then |
| // operations on agent default resources which request feedback |
| // will not be allowed. |
| RESOURCE_PROVIDER = 4; |
| |
| // This expresses the capability for the agent to handle persistent volume |
| // resize operations safely. This capability is turned on by default. |
| RESIZE_VOLUME = 5; |
| |
| // This expresses the ability of the agent to handle operation feedback |
| // for operations on agent default resources. |
| // |
| // Note that full support for this feature also requires the |
| // RESOURCE_PROVIDER capability; if you would like the agent to |
| // handle feedback for operations on agent default resources, the |
| // RESOURCE_PROVIDER capability should be set as well. |
| AGENT_OPERATION_FEEDBACK = 6; |
| |
| // This expresses the ability for the agent to automatically drain tasks |
| // in preparation for operator maintenance. This capability is required. |
| AGENT_DRAINING = 7; |
| |
| // This expresses the ability for the agent to launch tasks which specify |
| // resource limits for CPU and/or memory. |
| TASK_RESOURCE_LIMITS = 8; |
| } |
| |
| // Enum fields should be optional, see: MESOS-4997. |
| optional Type type = 1; |
| } |
| } |
| |
| |
| /** |
| * Describes the container configuration to run a managed CSI plugin. |
| */ |
| message CSIPluginContainerInfo { |
| enum Service { |
| UNKNOWN = 0; |
| CONTROLLER_SERVICE = 1; |
| NODE_SERVICE = 2; |
| } |
| |
| repeated Service services = 1; |
| optional CommandInfo command = 2; |
| repeated Resource resources = 3; |
| optional ContainerInfo container = 4; |
| } |
| |
| |
| /** |
| * Describes the endpoint of an unmanaged CSI plugin service. |
| */ |
| message CSIPluginEndpoint { |
| required CSIPluginContainerInfo.Service csi_service = 1; |
| required string endpoint = 2; |
| } |
| |
| |
| /** |
| * Describes a CSI plugin. |
| */ |
| message CSIPluginInfo { |
| // The type of the CSI plugin. This uniquely identifies a CSI |
| // implementation. For instance: |
| // org.apache.mesos.csi.test |
| // |
| // Please follow to Java package naming convention |
| // (https://en.wikipedia.org/wiki/Java_package#Package_naming_conventions) |
| // to avoid conflicts on type names. |
| required string type = 1; |
| |
| // The name of the CSI plugin. There could be multiple instances of a |
| // type of CSI plugin within a Mesos cluster. The name field is used to |
| // distinguish these instances. It should be a legal Java identifier |
| // (https://docs.oracle.com/javase/tutorial/java/nutsandbolts/variables.html) |
| // to avoid conflicts on concatenation of type and name. |
| // |
| // The type and name together provide the means to uniquely identify a storage |
| // backend and its resources in the cluster, so the operator should ensure |
| // that the concatenation of type and name is unique in the cluster, and it |
| // remains the same if the instance is migrated to another agent (e.g., there |
| // is a change in the agent ID). |
| optional string name = 2 [default = "default"]; |
| |
| // We support two kinds of CSI plugins: |
| // 1. Managed CSI plugins: This is the plugin which will be launched by |
| // Mesos as standalone container, and Mesos will internally determine |
| // its endpoint when launching it and manage its whole lifecyle. For this |
| // kind of plugins, the `containers` field below must be specified. |
| // 2. Unmanaged CSI plugins: This is the plugin which is launched out of |
| // Mesos (e.g., manually launched by the operator). For this kind of |
| // plugins, the `endpoints` field below must be specified because Mesos |
| // needs it to call CSI gRPC methods. |
| // Please note that only one of the `containers` and `endpoints` fields should |
| // be specified. |
| |
| // A list of container configurations to run managed CSI plugin. |
| // The controller service will be served by the first configuration |
| // that contains `CONTROLLER_SERVICE`, and the node service will be |
| // served by the first configuration that contains `NODE_SERVICE`. |
| repeated CSIPluginContainerInfo containers = 3; |
| |
| // The service endpoints of the unmanaged CSI plugin. An endpoint is usually |
| // a path to a Unix domain socket. |
| repeated CSIPluginEndpoint endpoints = 4; |
| |
| // The root directory of all the target paths managed by the CSI plugin. |
| // Each volume will be published by the CSI plugin at a sub-directory |
| // under this path. |
| optional string target_path_root = 5; |
| |
| // For some CSI plugins which implement CSI v1 spec, they expect the target |
| // path is an existing path which is actually not CSI v1 spec compliant. In |
| // such case this field should be set to `true` as a work around for those |
| // plugins. For the CSI plugins which implement CSI v0 spec, this field will |
| // be just ignored. |
| optional bool target_path_exists = 6; |
| } |
| |
| |
| /** |
| * Describes a resource provider. Note that the 'id' field is only available |
| * after a resource provider is registered with the master, and is made |
| * available here to facilitate re-registration. |
| */ |
| message ResourceProviderInfo { |
| optional ResourceProviderID id = 1; |
| repeated Attribute attributes = 2; |
| |
| // The type of the resource provider. This uniquely identifies a |
| // resource provider implementation. For instance: |
| // org.apache.mesos.rp.local.storage |
| // |
| // Please follow to Java package naming convention |
| // (https://en.wikipedia.org/wiki/Java_package#Package_naming_conventions) |
| // to avoid conflicts on type names. |
| required string type = 3; |
| |
| // The name of the resource provider. There could be multiple |
| // instances of a type of resource provider. The name field is used |
| // to distinguish these instances. It should be a legal Java identifier |
| // (https://docs.oracle.com/javase/tutorial/java/nutsandbolts/variables.html) |
| // to avoid conflicts on concatenation of type and name. |
| required string name = 4; |
| |
| // The stack of default reservations. If this field is not empty, it |
| // indicates that resources from this resource provider are reserved |
| // by default, except for the resources that have been reserved or |
| // unreserved through operations. The first `ReservationInfo` |
| // may have type `STATIC` or `DYNAMIC`, but the rest must have |
| // `DYNAMIC`. One can create a new reservation on top of an existing |
| // one by pushing a new `ReservationInfo` to the back. The last |
| // `ReservationInfo` in this stack is the "current" reservation. The |
| // new reservation's role must be a child of the current one. |
| repeated Resource.ReservationInfo default_reservations = 5; // EXPERIMENTAL. |
| |
| // Storage resource provider related information. |
| message Storage { |
| required CSIPluginInfo plugin = 1; |
| |
| // Amount of time to wait after the resource provider finishes reconciling |
| // existing volumes and storage pools against the CSI plugin to start the |
| // next reconciliation. A non-positive value means that no reconciliation |
| // will happen after startup. |
| optional double reconciliation_interval_seconds = 2; |
| } |
| |
| optional Storage storage = 6; // EXPERIMENTAL. |
| } |
| |
| |
| /** |
| * Describes an Attribute or Resource "value". A value is described |
| * using the standard protocol buffer "union" trick. |
| */ |
| message Value { |
| enum Type { |
| SCALAR = 0; |
| RANGES = 1; |
| SET = 2; |
| TEXT = 3; |
| } |
| |
| message Scalar { |
| // Scalar values are represented using floating point. To reduce |
| // the chance of unpredictable floating point behavior due to |
| // roundoff error, Mesos only supports three decimal digits of |
| // precision for scalar resource values. That is, floating point |
| // values are converted to a fixed point format that supports |
| // three decimal digits of precision, and then converted back to |
| // floating point on output. Any additional precision in scalar |
| // resource values is discarded (via rounding). |
| required double value = 1; |
| } |
| |
| message Range { |
| required uint64 begin = 1; |
| required uint64 end = 2; |
| } |
| |
| message Ranges { |
| repeated Range range = 1; |
| } |
| |
| message Set { |
| repeated string item = 1; |
| } |
| |
| message Text { |
| required string value = 1; |
| } |
| |
| required Type type = 1; |
| optional Scalar scalar = 2; |
| optional Ranges ranges = 3; |
| optional Set set = 4; |
| optional Text text = 5; |
| } |
| |
| |
| /** |
| * Describes an attribute that can be set on a machine. For now, |
| * attributes and resources share the same "value" type, but this may |
| * change in the future and attributes may only be string based. |
| */ |
| message Attribute { |
| required string name = 1; |
| required Value.Type type = 2; |
| optional Value.Scalar scalar = 3; |
| optional Value.Ranges ranges = 4; |
| optional Value.Set set = 6; |
| optional Value.Text text = 5; |
| } |
| |
| |
| /** |
| * Describes a resource from a resource provider. The `name` field is |
| * a string like "cpus" or "mem" that indicates which kind of resource |
| * this is; the rest of the fields describe the properties of the |
| * resource. A resource can take on one of three types: scalar |
| * (double), a list of finite and discrete ranges (e.g., [1-10, |
| * 20-30]), or a set of items. A resource is described using the |
| * standard protocol buffer "union" trick. |
| * |
| * Note that "disk" and "mem" resources are scalar values expressed in |
| * megabytes. Fractional "cpus" values are allowed (e.g., "0.5"), |
| * which correspond to partial shares of a CPU. |
| */ |
| message Resource { |
| // Specified if the resource comes from a particular resource provider. |
| optional ResourceProviderID provider_id = 12; |
| |
| required string name = 1; |
| required Value.Type type = 2; |
| optional Value.Scalar scalar = 3; |
| optional Value.Ranges ranges = 4; |
| optional Value.Set set = 5; |
| |
| // The role that this resource is reserved for. If "*", this indicates |
| // that the resource is unreserved. Otherwise, the resource will only |
| // be offered to frameworks that belong to this role. |
| // |
| // NOTE: Frameworks must not set this field if `reservations` is set. |
| // See the 'Resource Format' section for more details. |
| // |
| // TODO(mpark): Deprecate once `reservations` is no longer experimental. |
| optional string role = 6 [default = "*", deprecated=true]; |
| |
| // This was initially introduced to support MULTI_ROLE capable |
| // frameworks. Frameworks that are not MULTI_ROLE capable can |
| // continue to assume that the offered resources are allocated |
| // to their role. |
| message AllocationInfo { |
| // If set, this resource is allocated to a role. Note that in the |
| // future, this may be unset and the scheduler may be responsible |
| // for allocating to one of its roles. |
| optional string role = 1; |
| |
| // In the future, we may add additional fields here, e.g. priority |
| // tier, type of allocation (quota / fair share). |
| } |
| |
| optional AllocationInfo allocation_info = 11; |
| |
| // Resource Format: |
| // |
| // Frameworks receive resource offers in one of two formats, depending on |
| // whether the RESERVATION_REFINEMENT capability is enabled. |
| // |
| // __WITHOUT__ the RESERVATION_REFINEMENT capability, the framework is offered |
| // resources in the "pre-reservation-refinement" format. In this format, the |
| // `Resource.role` and `Resource.reservation` fields are used in conjunction |
| // to describe the reservation state of a `Resource` message. |
| // |
| // The following is an overview of the possible reservation states: |
| // |
| // +------------+------------------------------------------------------------+ |
| // | unreserved | { | |
| // | | role: "*", | |
| // | | reservation: <not set>, | |
| // | | reservations: <unused> | |
| // | | } | |
| // +------------+------------------------------------------------------------+ |
| // | static | { | |
| // | | role: "eng", | |
| // | | reservation: <not set>, | |
| // | | reservations: <unused> | |
| // | | } | |
| // +------------+------------------------------------------------------------+ |
| // | dynamic | { | |
| // | | role: "eng", | |
| // | | reservation: { | |
| // | | type: <unused>, | |
| // | | role: <unused>, | |
| // | | principal: <optional>, | |
| // | | labels: <optional> | |
| // | | }, | |
| // | | reservations: <unused> | |
| // | | } | |
| // +------------+------------------------------------------------------------+ |
| // |
| // __WITH__ the RESERVATION_REFINEMENT capability, the framework is offered |
| // resources in the "post-reservation-refinement" format. In this format, the |
| // reservation state of a `Resource` message is expressed solely in |
| // `Resource.reservations` field. |
| // |
| // The following is an overview of the possible reservation states: |
| // |
| // +------------+------------------------------------------------------------+ |
| // | unreserved | { | |
| // | | role: <unused>, | |
| // | | reservation: <unused>, | |
| // | | reservations: [] | |
| // | | } | |
| // +------------+------------------------------------------------------------+ |
| // | static | { | |
| // | | role: <unused>, | |
| // | | reservation: <unused>, | |
| // | | reservations: [ | |
| // | | { | |
| // | | type: STATIC, | |
| // | | role: "eng", | |
| // | | principal: <optional>, | |
| // | | labels: <optional> | |
| // | | } | |
| // | | ] | |
| // | | } | |
| // +------------+------------------------------------------------------------+ |
| // | dynamic | { | |
| // | | role: <unused>, | |
| // | | reservation: <unused>, | |
| // | | reservations: [ | |
| // | | { | |
| // | | type: DYNAMIC, | |
| // | | role: "eng", | |
| // | | principal: <optional>, | |
| // | | labels: <optional> | |
| // | | } | |
| // | | ] | |
| // | | } | |
| // +------------+------------------------------------------------------------+ |
| // |
| // We can also __refine__ reservations with this capability like so: |
| // |
| // +------------+------------------------------------------------------------+ |
| // | refined | { | |
| // | | role: <unused>, | |
| // | | reservation: <unused>, | |
| // | | reservations: [ | |
| // | | { | |
| // | | type: STATIC or DYNAMIC, | |
| // | | role: "eng", | |
| // | | principal: <optional>, | |
| // | | labels: <optional> | |
| // | | }, | |
| // | | { | |
| // | | type: DYNAMIC, | |
| // | | role: "eng/front_end", | |
| // | | principal: <optional>, | |
| // | | labels: <optional> | |
| // | | } | |
| // | | ] | |
| // | | } | |
| // +------------+------------------------------------------------------------+ |
| // |
| // NOTE: Each `ReservationInfo` in the `reservations` field denotes |
| // a reservation that refines the previous `ReservationInfo`. |
| |
| message ReservationInfo { |
| // Describes a reservation. A static reservation is set by the operator on |
| // the command-line and they are immutable without agent restart. A dynamic |
| // reservation is made by an operator via the '/reserve' HTTP endpoint |
| // or by a framework via the offer cycle by sending back an |
| // 'Offer::Operation::Reserve' message. |
| // |
| // NOTE: We currently do not allow frameworks with role "*" to make dynamic |
| // reservations. |
| |
| enum Type { |
| UNKNOWN = 0; |
| STATIC = 1; |
| DYNAMIC = 2; |
| } |
| |
| // The type of this reservation. |
| // |
| // NOTE: This field must not be set for `Resource.reservation`. |
| // See the 'Resource Format' section for more details. |
| optional Type type = 4; |
| |
| // The role to which this reservation is made for. |
| // |
| // NOTE: This field must not be set for `Resource.reservation`. |
| // See the 'Resource Format' section for more details. |
| optional string role = 3; |
| |
| // Indicates the principal, if any, of the framework or operator |
| // that reserved this resource. If reserved by a framework, the |
| // field should match the `FrameworkInfo.principal`. It is used in |
| // conjunction with the `UnreserveResources` ACL to determine |
| // whether the entity attempting to unreserve this resource is |
| // permitted to do so. |
| optional string principal = 1; |
| |
| // Labels are free-form key value pairs that can be used to |
| // associate arbitrary metadata with a reserved resource. For |
| // example, frameworks can use labels to identify the intended |
| // purpose for a portion of the resources the framework has |
| // reserved at a given agent. Labels should not contain duplicate |
| // key-value pairs. |
| optional Labels labels = 2; |
| } |
| |
| // If this is set, this resource was dynamically reserved by an |
| // operator or a framework. Otherwise, this resource is either unreserved |
| // or statically reserved by an operator via the --resources flag. |
| // |
| // NOTE: Frameworks must not set this field if `reservations` is set. |
| // See the 'Resource Format' section for more details. |
| // |
| // TODO(mpark): Deprecate once `reservations` is no longer experimental. |
| optional ReservationInfo reservation = 8; |
| |
| // The stack of reservations. If this field is empty, it indicates that this |
| // resource is unreserved. Otherwise, the resource is reserved. The first |
| // `ReservationInfo` may have type `STATIC` or `DYNAMIC`, but the rest must |
| // have `DYNAMIC`. One can create a new reservation on top of an existing |
| // one by pushing a new `ReservationInfo` to the back. The last |
| // `ReservationInfo` in this stack is the "current" reservation. The new |
| // reservation's role must be a child of the current reservation's role. |
| // |
| // NOTE: Frameworks must not set this field if `reservation` is set. |
| // See the 'Resource Format' section for more details. |
| // |
| // TODO(mpark): Deprecate `role` and `reservation` once this is stable. |
| repeated ReservationInfo reservations = 13; // EXPERIMENTAL. |
| |
| message DiskInfo { |
| // Describes a persistent disk volume. |
| // |
| // A persistent disk volume will not be automatically garbage |
| // collected if the task/executor/agent terminates, but will be |
| // re-offered to the framework(s) belonging to the 'role'. |
| // |
| // NOTE: Currently, we do not allow persistent disk volumes |
| // without a reservation (i.e., 'role' cannot be '*'). |
| message Persistence { |
| // A unique ID for the persistent disk volume. This ID must be |
| // unique per role on each agent. Although it is possible to use |
| // the same ID on different agents in the cluster and to reuse |
| // IDs after a volume with that ID has been destroyed, both |
| // practices are discouraged. |
| required string id = 1; |
| |
| // This field indicates the principal of the operator or |
| // framework that created this volume. It is used in conjunction |
| // with the "destroy" ACL to determine whether an entity |
| // attempting to destroy the volume is permitted to do so. |
| // |
| // NOTE: This field should match the FrameworkInfo.principal of |
| // the framework that created the volume. |
| optional string principal = 2; |
| } |
| |
| optional Persistence persistence = 1; |
| |
| // Describes how this disk resource will be mounted in the |
| // container. If not set, the disk resource will be used as the |
| // sandbox. Otherwise, it will be mounted according to the |
| // 'container_path' inside 'volume'. The 'host_path' inside |
| // 'volume' is ignored. |
| // NOTE: If 'volume' is set but 'persistence' is not set, the |
| // volume will be automatically garbage collected after |
| // task/executor terminates. Currently, if 'persistence' is set, |
| // 'volume' must be set. |
| optional Volume volume = 2; |
| |
| // Describes where a disk originates from. |
| message Source { |
| enum Type { |
| UNKNOWN = 0; |
| PATH = 1; |
| MOUNT = 2; |
| BLOCK = 3; |
| RAW = 4; |
| } |
| |
| // A folder that can be located on a separate disk device. This |
| // can be shared and carved up as necessary between frameworks. |
| message Path { |
| // Path to the folder (e.g., /mnt/raid/disk0). If the path is a |
| // relative path, it is relative to the agent work directory. |
| optional string root = 1; |
| } |
| |
| // A mounted file-system set up by the Agent administrator. This |
| // can only be used exclusively: a framework cannot accept a |
| // partial amount of this disk. |
| message Mount { |
| // Path to mount point (e.g., /mnt/raid/disk0). If the path is a |
| // relative path, it is relative to the agent work directory. |
| optional string root = 1; |
| } |
| |
| required Type type = 1; |
| optional Path path = 2; |
| optional Mount mount = 3; |
| |
| // The vendor of this source. If present, this field provides the means to |
| // uniquely identify the storage backend of this source in the cluster. |
| optional string vendor = 7; // EXPERIMENTAL. |
| |
| // The identifier of this source. This field maps onto CSI volume IDs and |
| // is not expected to be set by frameworks. If both `vendor` and `id` are |
| // present, these two fields together provide the means to uniquely |
| // identify this source in the cluster. |
| optional string id = 4; // EXPERIMENTAL. |
| |
| // Additional metadata for this source. This field maps onto CSI volume |
| // context. Frameworks should neither alter this field, nor expect this |
| // field to remain unchanged. |
| optional Labels metadata = 5; // EXPERIMENTAL. |
| |
| // This field serves as an indirection to a set of storage |
| // vendor specific disk parameters which describe the properties |
| // of the disk. The operator will setup mappings between a |
| // profile name to a set of vendor specific disk parameters. And |
| // the framework will do disk selection based on profile names, |
| // instead of vendor specific disk parameters. |
| // |
| // Also see the DiskProfileAdaptor module. |
| optional string profile = 6; // EXPERIMENTAL. |
| } |
| |
| optional Source source = 3; |
| } |
| |
| optional DiskInfo disk = 7; |
| |
| message RevocableInfo {} |
| |
| // If this is set, the resources are revocable, i.e., any tasks or |
| // executors launched using these resources could get preempted or |
| // throttled at any time. This could be used by frameworks to run |
| // best effort tasks that do not need strict uptime or performance |
| // guarantees. Note that if this is set, 'disk' or 'reservation' |
| // cannot be set. |
| optional RevocableInfo revocable = 9; |
| |
| // Allow the resource to be shared across tasks. |
| message SharedInfo {} |
| |
| // If this is set, the resources are shared, i.e. multiple tasks |
| // can be launched using this resource and all of them shall refer |
| // to the same physical resource on the cluster. Note that only |
| // persistent volumes can be shared currently. |
| // |
| // NOTE: Different shared resources must be uniquely identifiable. |
| // This currently holds as persistent volume should have unique `id` |
| // (this is not validated for enforced though). |
| optional SharedInfo shared = 10; |
| } |
| |
| |
| /** |
| * When the network bandwidth caps are enabled and the container |
| * is over its limit, outbound packets may be either delayed or |
| * dropped completely either because it exceeds the maximum bandwidth |
| * allocation for a single container (the cap) or because the combined |
| * network traffic of multiple containers on the host exceeds the |
| * transmit capacity of the host (the share). We can report the |
| * following statistics for each of these conditions exported directly |
| * from the Linux Traffic Control Queueing Discipline. |
| * |
| * id : name of the limiter, e.g. 'tx_bw_cap' |
| * backlog : number of packets currently delayed |
| * bytes : total bytes seen |
| * drops : number of packets dropped in total |
| * overlimits : number of packets which exceeded allocation |
| * packets : total packets seen |
| * qlen : number of packets currently queued |
| * rate_bps : throughput in bytes/sec |
| * rate_pps : throughput in packets/sec |
| * requeues : number of times a packet has been delayed due to |
| * locking or device contention issues |
| * |
| * More information on the operation of Linux Traffic Control can be |
| * found at http://www.lartc.org/lartc.html. |
| */ |
| message TrafficControlStatistics { |
| required string id = 1; |
| optional uint64 backlog = 2; |
| optional uint64 bytes = 3; |
| optional uint64 drops = 4; |
| optional uint64 overlimits = 5; |
| optional uint64 packets = 6; |
| optional uint64 qlen = 7; |
| optional uint64 ratebps = 8; |
| optional uint64 ratepps = 9; |
| optional uint64 requeues = 10; |
| } |
| |
| |
| message IpStatistics { |
| optional int64 Forwarding = 1; |
| optional int64 DefaultTTL = 2; |
| optional int64 InReceives = 3; |
| optional int64 InHdrErrors = 4; |
| optional int64 InAddrErrors = 5; |
| optional int64 ForwDatagrams = 6; |
| optional int64 InUnknownProtos = 7; |
| optional int64 InDiscards = 8; |
| optional int64 InDelivers = 9; |
| optional int64 OutRequests = 10; |
| optional int64 OutDiscards = 11; |
| optional int64 OutNoRoutes = 12; |
| optional int64 ReasmTimeout = 13; |
| optional int64 ReasmReqds = 14; |
| optional int64 ReasmOKs = 15; |
| optional int64 ReasmFails = 16; |
| optional int64 FragOKs = 17; |
| optional int64 FragFails = 18; |
| optional int64 FragCreates = 19; |
| } |
| |
| |
| message IcmpStatistics { |
| optional int64 InMsgs = 1; |
| optional int64 InErrors = 2; |
| optional int64 InCsumErrors = 3; |
| optional int64 InDestUnreachs = 4; |
| optional int64 InTimeExcds = 5; |
| optional int64 InParmProbs = 6; |
| optional int64 InSrcQuenchs = 7; |
| optional int64 InRedirects = 8; |
| optional int64 InEchos = 9; |
| optional int64 InEchoReps = 10; |
| optional int64 InTimestamps = 11; |
| optional int64 InTimestampReps = 12; |
| optional int64 InAddrMasks = 13; |
| optional int64 InAddrMaskReps = 14; |
| optional int64 OutMsgs = 15; |
| optional int64 OutErrors = 16; |
| optional int64 OutDestUnreachs = 17; |
| optional int64 OutTimeExcds = 18; |
| optional int64 OutParmProbs = 19; |
| optional int64 OutSrcQuenchs = 20; |
| optional int64 OutRedirects = 21; |
| optional int64 OutEchos = 22; |
| optional int64 OutEchoReps = 23; |
| optional int64 OutTimestamps = 24; |
| optional int64 OutTimestampReps = 25; |
| optional int64 OutAddrMasks = 26; |
| optional int64 OutAddrMaskReps = 27; |
| } |
| |
| |
| message TcpStatistics { |
| optional int64 RtoAlgorithm = 1; |
| optional int64 RtoMin = 2; |
| optional int64 RtoMax = 3; |
| optional int64 MaxConn = 4; |
| optional int64 ActiveOpens = 5; |
| optional int64 PassiveOpens = 6; |
| optional int64 AttemptFails = 7; |
| optional int64 EstabResets = 8; |
| optional int64 CurrEstab = 9; |
| optional int64 InSegs = 10; |
| optional int64 OutSegs = 11; |
| optional int64 RetransSegs = 12; |
| optional int64 InErrs = 13; |
| optional int64 OutRsts = 14; |
| optional int64 InCsumErrors = 15; |
| } |
| |
| |
| message UdpStatistics { |
| optional int64 InDatagrams = 1; |
| optional int64 NoPorts = 2; |
| optional int64 InErrors = 3; |
| optional int64 OutDatagrams = 4; |
| optional int64 RcvbufErrors = 5; |
| optional int64 SndbufErrors = 6; |
| optional int64 InCsumErrors = 7; |
| optional int64 IgnoredMulti = 8; |
| } |
| |
| |
| message SNMPStatistics { |
| optional IpStatistics ip_stats = 1; |
| optional IcmpStatistics icmp_stats = 2; |
| optional TcpStatistics tcp_stats = 3; |
| optional UdpStatistics udp_stats = 4; |
| } |
| |
| |
| message DiskStatistics { |
| optional Resource.DiskInfo.Source source = 1; |
| optional Resource.DiskInfo.Persistence persistence = 2; |
| optional uint64 limit_bytes = 3; |
| optional uint64 used_bytes = 4; |
| } |
| |
| |
| /** |
| * A snapshot of resource usage statistics. |
| */ |
| message ResourceStatistics { |
| required double timestamp = 1; // Snapshot time, in seconds since the Epoch. |
| |
| optional uint32 processes = 30; |
| optional uint32 threads = 31; |
| |
| // CPU Usage Information: |
| // Total CPU time spent in user mode, and kernel mode. |
| optional double cpus_user_time_secs = 2; |
| optional double cpus_system_time_secs = 3; |
| |
| // Hard CPU limit. |
| optional double cpus_limit = 4; |
| |
| // Soft CPU limit. |
| optional double cpus_soft_limit = 45; |
| |
| // cpu.stat on process throttling (for contention issues). |
| optional uint32 cpus_nr_periods = 7; |
| optional uint32 cpus_nr_throttled = 8; |
| optional double cpus_throttled_time_secs = 9; |
| |
| // Memory Usage Information: |
| |
| // mem_total_bytes was added in 0.23.0 to represent the total memory |
| // of a process in RAM (as opposed to in Swap). This was previously |
| // reported as mem_rss_bytes, which was also changed in 0.23.0 to |
| // represent only the anonymous memory usage, to keep in sync with |
| // Linux kernel's (arguably erroneous) use of terminology. |
| optional uint64 mem_total_bytes = 36; |
| |
| // Total memory + swap usage. This is set if swap is enabled. |
| optional uint64 mem_total_memsw_bytes = 37; |
| |
| // Hard memory limit. |
| optional uint64 mem_limit_bytes = 6; |
| |
| // Soft memory limit. |
| optional uint64 mem_soft_limit_bytes = 38; |
| |
| // Broken out memory usage information: pagecache, rss (anonymous), |
| // mmaped files and swap. |
| |
| // TODO(chzhcn) mem_file_bytes and mem_anon_bytes are deprecated in |
| // 0.23.0 and will be removed in 0.24.0. |
| optional uint64 mem_file_bytes = 10; |
| optional uint64 mem_anon_bytes = 11; |
| |
| // mem_cache_bytes is added in 0.23.0 to represent page cache usage. |
| optional uint64 mem_cache_bytes = 39; |
| |
| // Since 0.23.0, mem_rss_bytes is changed to represent only |
| // anonymous memory usage. Note that neither its requiredness, type, |
| // name nor numeric tag has been changed. |
| optional uint64 mem_rss_bytes = 5; |
| |
| optional uint64 mem_mapped_file_bytes = 12; |
| // This is only set if swap is enabled. |
| optional uint64 mem_swap_bytes = 40; |
| optional uint64 mem_unevictable_bytes = 41; |
| |
| // Number of occurrences of different levels of memory pressure |
| // events reported by memory cgroup. Pressure listening (re)starts |
| // with these values set to 0 when agent (re)starts. See |
| // https://www.kernel.org/doc/Documentation/cgroups/memory.txt for |
| // more details. |
| optional uint64 mem_low_pressure_counter = 32; |
| optional uint64 mem_medium_pressure_counter = 33; |
| optional uint64 mem_critical_pressure_counter = 34; |
| |
| // Disk Usage Information for executor working directory. |
| optional uint64 disk_limit_bytes = 26; |
| optional uint64 disk_used_bytes = 27; |
| |
| // Per disk (resource) statistics. |
| repeated DiskStatistics disk_statistics = 43; |
| |
| // Cgroups blkio statistics. |
| optional CgroupInfo.Blkio.Statistics blkio_statistics = 44; |
| |
| // Perf statistics. |
| optional PerfStatistics perf = 13; |
| |
| // Network Usage Information: |
| optional uint64 net_rx_packets = 14; |
| optional uint64 net_rx_bytes = 15; |
| optional uint64 net_rx_errors = 16; |
| optional uint64 net_rx_dropped = 17; |
| optional uint64 net_tx_packets = 18; |
| optional uint64 net_tx_bytes = 19; |
| optional uint64 net_tx_errors = 20; |
| optional uint64 net_tx_dropped = 21; |
| |
| // The kernel keeps track of RTT (round-trip time) for its TCP |
| // sockets. RTT is a way to tell the latency of a container. |
| optional double net_tcp_rtt_microsecs_p50 = 22; |
| optional double net_tcp_rtt_microsecs_p90 = 23; |
| optional double net_tcp_rtt_microsecs_p95 = 24; |
| optional double net_tcp_rtt_microsecs_p99 = 25; |
| |
| optional double net_tcp_active_connections = 28; |
| optional double net_tcp_time_wait_connections = 29; |
| |
| // Network traffic flowing into or out of a container can be delayed |
| // or dropped due to congestion or policy inside and outside the |
| // container. |
| repeated TrafficControlStatistics net_traffic_control_statistics = 35; |
| |
| // Network SNMP statistics for each container. |
| optional SNMPStatistics net_snmp_statistics = 42; |
| } |
| |
| |
| /** |
| * Describes a snapshot of the resource usage for executors. |
| */ |
| message ResourceUsage { |
| message Executor { |
| required ExecutorInfo executor_info = 1; |
| |
| // This includes resources used by the executor itself |
| // as well as its active tasks. |
| repeated Resource allocated = 2; |
| |
| // Current resource usage. If absent, the containerizer |
| // cannot provide resource usage. |
| optional ResourceStatistics statistics = 3; |
| |
| // The container id for the executor specified in the executor_info field. |
| required ContainerID container_id = 4; |
| |
| message Task { |
| required string name = 1; |
| required TaskID id = 2; |
| repeated Resource resources = 3; |
| optional Labels labels = 4; |
| } |
| |
| // Non-terminal tasks. |
| repeated Task tasks = 5; |
| } |
| |
| repeated Executor executors = 1; |
| |
| // Agent's total resources including checkpointed dynamic |
| // reservations and persistent volumes. |
| repeated Resource total = 2; |
| } |
| |
| |
| /** |
| * Describes a sample of events from "perf stat". Only available on |
| * Linux. |
| * |
| * NOTE: Each optional field matches the name of a perf event (see |
| * "perf list") with the following changes: |
| * 1. Names are downcased. |
| * 2. Hyphens ('-') are replaced with underscores ('_'). |
| * 3. Events with alternate names use the name "perf stat" returns, |
| * e.g., for the event "cycles OR cpu-cycles" perf always returns |
| * cycles. |
| */ |
| message PerfStatistics { |
| required double timestamp = 1; // Start of sample interval, in seconds since the Epoch. |
| required double duration = 2; // Duration of sample interval, in seconds. |
| |
| // Hardware event. |
| optional uint64 cycles = 3; |
| optional uint64 stalled_cycles_frontend = 4; |
| optional uint64 stalled_cycles_backend = 5; |
| optional uint64 instructions = 6; |
| optional uint64 cache_references = 7; |
| optional uint64 cache_misses = 8; |
| optional uint64 branches = 9; |
| optional uint64 branch_misses = 10; |
| optional uint64 bus_cycles = 11; |
| optional uint64 ref_cycles = 12; |
| |
| // Software event. |
| optional double cpu_clock = 13; |
| optional double task_clock = 14; |
| optional uint64 page_faults = 15; |
| optional uint64 minor_faults = 16; |
| optional uint64 major_faults = 17; |
| optional uint64 context_switches = 18; |
| optional uint64 cpu_migrations = 19; |
| optional uint64 alignment_faults = 20; |
| optional uint64 emulation_faults = 21; |
| |
| // Hardware cache event. |
| optional uint64 l1_dcache_loads = 22; |
| optional uint64 l1_dcache_load_misses = 23; |
| optional uint64 l1_dcache_stores = 24; |
| optional uint64 l1_dcache_store_misses = 25; |
| optional uint64 l1_dcache_prefetches = 26; |
| optional uint64 l1_dcache_prefetch_misses = 27; |
| optional uint64 l1_icache_loads = 28; |
| optional uint64 l1_icache_load_misses = 29; |
| optional uint64 l1_icache_prefetches = 30; |
| optional uint64 l1_icache_prefetch_misses = 31; |
| optional uint64 llc_loads = 32; |
| optional uint64 llc_load_misses = 33; |
| optional uint64 llc_stores = 34; |
| optional uint64 llc_store_misses = 35; |
| optional uint64 llc_prefetches = 36; |
| optional uint64 llc_prefetch_misses = 37; |
| optional uint64 dtlb_loads = 38; |
| optional uint64 dtlb_load_misses = 39; |
| optional uint64 dtlb_stores = 40; |
| optional uint64 dtlb_store_misses = 41; |
| optional uint64 dtlb_prefetches = 42; |
| optional uint64 dtlb_prefetch_misses = 43; |
| optional uint64 itlb_loads = 44; |
| optional uint64 itlb_load_misses = 45; |
| optional uint64 branch_loads = 46; |
| optional uint64 branch_load_misses = 47; |
| optional uint64 node_loads = 48; |
| optional uint64 node_load_misses = 49; |
| optional uint64 node_stores = 50; |
| optional uint64 node_store_misses = 51; |
| optional uint64 node_prefetches = 52; |
| optional uint64 node_prefetch_misses = 53; |
| } |
| |
| |
| /** |
| * Represents filters that allow a framework to control the shape of |
| * offers that will be sent to its role(s). These filters apply |
| * globally to any agent (unlike the existing `DECLINE` filter which |
| * us a time-based resource subset filter that only applies to the |
| * agent that was declined). |
| * |
| * NOTE: Custom allocators might interpret these fields in a different |
| * way, or not at all. |
| */ |
| message OfferFilters { |
| message ResourceQuantities { |
| // Quantities are pairs of identifiers of scalar resources and |
| // an associated value, e.g., `{"disk": Scalar {"value": 30}}`. |
| map<string, Value.Scalar> quantities = 1; |
| } |
| |
| message MinAllocatableResources { |
| // A set of resources is considered allocatable if contained in any of |
| // the following quantities. If no quantities are specified any resource |
| // is considered allocatable. |
| repeated ResourceQuantities quantities = 1; |
| } |
| |
| optional MinAllocatableResources min_allocatable_resources = 1; |
| } |
| |
| |
| /** |
| * Describes a request for resources that can be used by a framework |
| * to proactively influence the allocator. If 'agent_id' is provided |
| * then this request is assumed to only apply to resources on that |
| * agent. |
| */ |
| message Request { |
| optional AgentID agent_id = 1; |
| repeated Resource resources = 2; |
| } |
| |
| |
| /** |
| * Describes some resources available on an agent. An offer only |
| * contains resources from a single agent. |
| */ |
| message Offer { |
| required OfferID id = 1; |
| required FrameworkID framework_id = 2; |
| required AgentID agent_id = 3; |
| required string hostname = 4; |
| |
| // URL for reaching the agent running on the host. |
| optional URL url = 8; |
| |
| // The domain of the agent. |
| optional DomainInfo domain = 11; |
| |
| repeated Resource resources = 5; |
| repeated Attribute attributes = 7; |
| |
| // Executors of the same framework running on this agent. |
| repeated ExecutorID executor_ids = 6; |
| |
| // Signifies that the resources in this Offer may be unavailable during |
| // the given interval. Any tasks launched using these resources may be |
| // killed when the interval arrives. For example, these resources may be |
| // part of a planned maintenance schedule. |
| // |
| // This field only provides information about a planned unavailability. |
| // The unavailability interval may not necessarily start at exactly this |
| // interval, nor last for exactly the duration of this interval. |
| // The unavailability may also be forever! See comments in |
| // `Unavailability` for more details. |
| optional Unavailability unavailability = 9; |
| |
| // An offer represents resources allocated to *one* of the |
| // roles managed by the scheduler. (Therefore, each |
| // `Offer.resources[i].allocation_info` will match the |
| // top level `Offer.allocation_info`). |
| optional Resource.AllocationInfo allocation_info = 10; |
| |
| // Defines an operation that can be performed against offers. |
| message Operation { |
| enum Type { |
| UNKNOWN = 0; |
| LAUNCH = 1; |
| LAUNCH_GROUP = 6; |
| RESERVE = 2; |
| UNRESERVE = 3; |
| CREATE = 4; |
| DESTROY = 5; |
| GROW_VOLUME = 11; // EXPERIMENTAL. |
| SHRINK_VOLUME = 12; // EXPERIMENTAL. |
| CREATE_DISK = 13; // EXPERIMENTAL. |
| DESTROY_DISK = 14; // EXPERIMENTAL. |
| } |
| |
| // TODO(vinod): Deprecate this in favor of `LaunchGroup` below. |
| message Launch { |
| repeated TaskInfo task_infos = 1; |
| } |
| |
| // Unlike `Launch` above, all the tasks in a `task_group` are |
| // atomically delivered to an executor. |
| // |
| // `NetworkInfo` set on executor will be shared by all tasks in |
| // the task group. |
| // |
| // TODO(vinod): Any volumes set on executor could be used by a |
| // task by explicitly setting `Volume.source` in its resources. |
| message LaunchGroup { |
| required ExecutorInfo executor = 1; |
| required TaskGroupInfo task_group = 2; |
| } |
| |
| message Reserve { |
| repeated Resource source = 2; |
| repeated Resource resources = 1; |
| } |
| |
| message Unreserve { |
| repeated Resource resources = 1; |
| } |
| |
| message Create { |
| repeated Resource volumes = 1; |
| } |
| |
| message Destroy { |
| repeated Resource volumes = 1; |
| } |
| |
| // Grow a volume by an additional disk resource. |
| // NOTE: This is currently experimental and only for persistent volumes |
| // created on ROOT/PATH disk. |
| message GrowVolume { |
| required Resource volume = 1; |
| required Resource addition = 2; |
| } |
| |
| // Shrink a volume by the size specified in the `subtract` field. |
| // NOTE: This is currently experimental and only for persistent volumes |
| // created on ROOT/PATH disk. |
| message ShrinkVolume { |
| required Resource volume = 1; |
| |
| // See comments in `Value.Scalar` for maximum precision supported. |
| required Value.Scalar subtract = 2; |
| } |
| |
| // Create a `MOUNT` or `BLOCK` disk resource backed by a CSI volume from a |
| // `RAW` disk resource. |
| // |
| // In the typical case where the `RAW` disk resource has a profile and no |
| // source ID, a new CSI volume will be provisioned by Mesos to back the |
| // returned `MOUNT` or `BLOCK` disk resource. However, the `RAW` disk |
| // resource can instead have no profile but a source ID, indicating that |
| // it is already backed by a CSI volume in one of the following scenarios: |
| // |
| // (1) The CSI volume is preprovisioned out-of-band. |
| // |
| // (2) The CSI volume is provisioned by Mesos, but Mesos has lost the |
| // corresponding `MOUNT` or `BLOCK` resource metadata. This could |
| // happen if there has been a change in the agent ID or resource |
| // provider ID where the volume belongs. |
| // |
| // In the above cases, Mesos won't provision a new CSI volume, but instead |
| // will simply return a `MOUNT` or `BLOCK` disk resource backed by the same |
| // CSI volume, with the profile specified in this call. |
| // |
| // NOTE: For the time being, this API is subject to change and the related |
| // feature is experimental. |
| message CreateDisk { |
| required Resource source = 1; |
| |
| // NOTE: Only `MOUNT` or `BLOCK` is allowed in this field. |
| required Resource.DiskInfo.Source.Type target_type = 2; |
| |
| // Apply the specified profile to the created disk. This field must be set |
| // if `source` does not have a profile, and must not be set if it has one. |
| // |
| // NOTE: The operation will fail If the specified profile is unknown to |
| // Mesos, i.e., not reported by the disk profile adaptor. |
| optional string target_profile = 3; |
| } |
| |
| // Destroy a disk resource backed by a CSI volume. |
| // |
| // In the typical case where the CSI plugin of the volume supports volume |
| // deprovisioning and the disk resource is a `MOUNT` or `BLOCK` disk with a |
| // profile known to Mesos, the volume will be deprovisioned and a `RAW` disk |
| // resource with the same profile but no source ID will be returned. |
| // However, the following scenarios could lead to different outcomes: |
| // |
| // (1) If the CSI plugin supports volume deprovisioning but the profile of |
| // the disk resource is unknown to the disk profile adaptor, or the disk |
| // resource is a `RAW` disk with no profile but a source ID (see above |
| // for possible scenarios), the volume will be deprovisioned but no |
| // resource will be returned. |
| // |
| // (2) If the CSI plugin does not support volume deprovisioning, the volume |
| // won't be deprovisioned and a `RAW` disk resource with no profile but |
| // the same source ID will be returned. |
| // |
| // NOTE: For the time being, this API is subject to change and the related |
| // feature is experimental. |
| message DestroyDisk { |
| // NOTE: Only a `MOUNT`, `BLOCK` or `RAW` disk is allowed in this field. |
| required Resource source = 1; |
| } |
| |
| |
| optional Type type = 1; |
| |
| // The `id` field allows frameworks to indicate that they wish to receive |
| // feedback about an operation via the UPDATE_OPERATION_STATUS event in the |
| // v1 scheduler API. |
| optional OperationID id = 12; // EXPERIMENTAL. |
| |
| optional Launch launch = 2; |
| optional LaunchGroup launch_group = 7; |
| optional Reserve reserve = 3; |
| optional Unreserve unreserve = 4; |
| optional Create create = 5; |
| optional Destroy destroy = 6; |
| optional GrowVolume grow_volume = 13; // EXPERIMENTAL. |
| optional ShrinkVolume shrink_volume = 14; // EXPERIMENTAL. |
| optional CreateDisk create_disk = 15; // EXPERIMENTAL. |
| optional DestroyDisk destroy_disk = 16; // EXPERIMENTAL. |
| } |
| } |
| |
| |
| /** |
| * A request to return some resources occupied by a framework. |
| */ |
| message InverseOffer { |
| // This is the same OfferID as found in normal offers, which allows |
| // re-use of some of the OfferID-only messages. |
| required OfferID id = 1; |
| |
| // URL for reaching the agent running on the host. This enables some |
| // optimizations as described in MESOS-3012, such as allowing the |
| // scheduler driver to bypass the master and talk directly with an agent. |
| optional URL url = 2; |
| |
| // The framework that should release its resources. |
| // If no specifics are provided (i.e. which agent), all the framework's |
| // resources are requested back. |
| required FrameworkID framework_id = 3; |
| |
| // Specified if the resources need to be released from a particular agent. |
| // All the framework's resources on this agent are requested back, |
| // unless further qualified by the `resources` field. |
| optional AgentID agent_id = 4; |
| |
| // This InverseOffer represents a planned unavailability event in the |
| // specified interval. Any tasks running on the given framework or agent |
| // may be killed when the interval arrives. Therefore, frameworks should |
| // aim to gracefully terminate tasks prior to the arrival of the interval. |
| // |
| // For reserved resources, the resources are expected to be returned to the |
| // framework after the unavailability interval. This is an expectation, |
| // not a guarantee. For example, if the unavailability duration is not set, |
| // the resources may be removed permanently. |
| // |
| // For other resources, there is no guarantee that requested resources will |
| // be returned after the unavailability interval. The allocator has no |
| // obligation to re-offer these resources to the prior framework after |
| // the unavailability. |
| required Unavailability unavailability = 5; |
| |
| // A list of resources being requested back from the framework, |
| // on the agent identified by `agent_id`. If no resources are specified |
| // then all resources are being requested back. For the purpose of |
| // maintenance, this field is always empty (maintenance always requests |
| // all resources back). |
| repeated Resource resources = 6; |
| |
| // TODO(josephw): Add additional options for narrowing down the resources |
| // being requested back. Such as specific executors, tasks, etc. |
| } |
| |
| |
| /** |
| * Describes a task. Passed from the scheduler all the way to an |
| * executor (see SchedulerDriver::launchTasks and |
| * Executor::launchTask). Either ExecutorInfo or CommandInfo should be set. |
| * A different executor can be used to launch this task, and subsequent tasks |
| * meant for the same executor can reuse the same ExecutorInfo struct. |
| */ |
| message TaskInfo { |
| required string name = 1; |
| required TaskID task_id = 2; |
| required AgentID agent_id = 3; |
| repeated Resource resources = 4; |
| optional ExecutorInfo executor = 5; |
| optional CommandInfo command = 7; |
| |
| // Task provided with a container will launch the container as part |
| // of this task paired with the task's CommandInfo. |
| optional ContainerInfo container = 9; |
| |
| // A health check for the task. Implemented for executor-less |
| // command-based tasks. For tasks that specify an executor, it is |
| // the executor's responsibility to implement the health checking. |
| optional HealthCheck health_check = 8; |
| |
| // A general check for the task. Implemented for all built-in executors. |
| // For tasks that specify an executor, it is the executor's responsibility |
| // to implement checking support. Executors should (all built-in executors |
| // will) neither interpret nor act on the check's result. |
| // |
| // NOTE: Check support in built-in executors is experimental. |
| // |
| // TODO(alexr): Consider supporting multiple checks per task. |
| optional CheckInfo check = 13; |
| |
| // A kill policy for the task. Implemented for executor-less |
| // command-based and docker tasks. For tasks that specify an |
| // executor, it is the executor's responsibility to implement |
| // the kill policy. |
| optional KillPolicy kill_policy = 12; |
| |
| optional bytes data = 6; |
| |
| // Labels are free-form key value pairs which are exposed through |
| // master and agent endpoints. Labels will not be interpreted or |
| // acted upon by Mesos itself. As opposed to the data field, labels |
| // will be kept in memory on master and agent processes. Therefore, |
| // labels should be used to tag tasks with light-weight meta-data. |
| // Labels should not contain duplicate key-value pairs. |
| optional Labels labels = 10; |
| |
| // Service discovery information for the task. It is not interpreted |
| // or acted upon by Mesos. It is up to a service discovery system |
| // to use this information as needed and to handle tasks without |
| // service discovery information. |
| optional DiscoveryInfo discovery = 11; |
| |
| // Maximum duration for task completion. If the task is non-terminal at the |
| // end of this duration, it will fail with the reason |
| // `REASON_MAX_COMPLETION_TIME_REACHED`. Mesos supports this field for |
| // executor-less tasks, and tasks that use Docker or default executors. |
| // It is the executor's responsibility to implement this, so it might not be |
| // supported by all custom executors. |
| optional DurationInfo max_completion_time = 14; |
| |
| // Resource limits associated with the task. |
| map<string, Value.Scalar> limits = 15; |
| } |
| |
| |
| /** |
| * Describes a group of tasks that belong to an executor. The |
| * executor will receive the task group in a single message to |
| * allow the group to be launched "atomically". |
| * |
| * NOTES: |
| * 1) `NetworkInfo` must not be set inside task's `ContainerInfo`. |
| * 2) `TaskInfo.executor` doesn't need to set. If set, it should match |
| * `LaunchGroup.executor`. |
| */ |
| message TaskGroupInfo { |
| repeated TaskInfo tasks = 1; |
| } |
| |
| |
| // TODO(bmahler): Add executor_uuid here, and send it to the master. This will |
| // allow us to expose executor work directories for tasks in the webui when |
| // looking from the master level. Currently only the agent knows which run the |
| // task belongs to. |
| /** |
| * Describes a task, similar to `TaskInfo`. |
| * |
| * `Task` is used in some of the Mesos messages found below. |
| * `Task` is used instead of `TaskInfo` if: |
| * 1) we need additional IDs, such as a specific |
| * framework, executor, or agent; or |
| * 2) we do not need the additional data, such as the command run by the |
| * task. These additional fields may be large and unnecessary for some |
| * Mesos messages. |
| * |
| * `Task` is generally constructed from a `TaskInfo`. See protobuf::createTask. |
| */ |
| message Task { |
| required string name = 1; |
| required TaskID task_id = 2; |
| required FrameworkID framework_id = 3; |
| optional ExecutorID executor_id = 4; |
| required AgentID agent_id = 5; |
| required TaskState state = 6; // Latest state of the task. |
| repeated Resource resources = 7; |
| repeated TaskStatus statuses = 8; |
| |
| // These fields correspond to the state and uuid of the latest |
| // status update forwarded to the master. |
| // NOTE: Either both the fields must be set or both must be unset. |
| optional TaskState status_update_state = 9; |
| optional bytes status_update_uuid = 10; |
| |
| optional Labels labels = 11; |
| |
| // Service discovery information for the task. It is not interpreted |
| // or acted upon by Mesos. It is up to a service discovery system |
| // to use this information as needed and to handle tasks without |
| // service discovery information. |
| optional DiscoveryInfo discovery = 12; |
| |
| // Container information for the task. |
| optional ContainerInfo container = 13; |
| |
| optional HealthCheck health_check = 15; |
| |
| // TODO(greggomann): Add the task's `CheckInfo`. See MESOS-8780. |
| |
| // The kill policy used for this task when it is killed. It's possible for |
| // this policy to be overridden by the scheduler when killing the task. |
| optional KillPolicy kill_policy = 16; |
| |
| // Specific user under which task is running. |
| optional string user = 14; |
| |
| // Resource limits associated with the task. |
| map<string, Value.Scalar> limits = 17; |
| } |
| |
| |
| /** |
| * Describes possible task states. IMPORTANT: Mesos assumes tasks that |
| * enter terminal states (see below) imply the task is no longer |
| * running and thus clean up any thing associated with the task |
| * (ultimately offering any resources being consumed by that task to |
| * another task). |
| */ |
| enum TaskState { |
| TASK_STAGING = 6; // Initial state. Framework status updates should not use. |
| TASK_STARTING = 0; // The task is being launched by the executor. |
| TASK_RUNNING = 1; |
| |
| // NOTE: This should only be sent when the framework has |
| // the TASK_KILLING_STATE capability. |
| TASK_KILLING = 8; // The task is being killed by the executor. |
| |
| // The task finished successfully on its own without external interference. |
| TASK_FINISHED = 2; // TERMINAL. |
| |
| TASK_FAILED = 3; // TERMINAL: The task failed to finish successfully. |
| TASK_KILLED = 4; // TERMINAL: The task was killed by the executor. |
| TASK_ERROR = 7; // TERMINAL: The task description contains an error. |
| |
| // In Mesos 1.3, this will only be sent when the framework does NOT |
| // opt-in to the PARTITION_AWARE capability. |
| // |
| // NOTE: This state is not always terminal. For example, tasks might |
| // transition from TASK_LOST to TASK_RUNNING or other states when a |
| // partitioned agent reregisters. |
| TASK_LOST = 5; // The task failed but can be rescheduled. |
| |
| // The following task states are only sent when the framework |
| // opts-in to the PARTITION_AWARE capability. |
| |
| // The task failed to launch because of a transient error. The |
| // task's executor never started running. Unlike TASK_ERROR, the |
| // task description is valid -- attempting to launch the task again |
| // may be successful. |
| TASK_DROPPED = 9; // TERMINAL. |
| |
| // The task was running on an agent that has lost contact with the |
| // master, typically due to a network failure or partition. The task |
| // may or may not still be running. |
| TASK_UNREACHABLE = 10; |
| |
| // The task is no longer running. This can occur if the agent has |
| // been terminated along with all of its tasks (e.g., the host that |
| // was running the agent was rebooted). It might also occur if the |
| // task was terminated due to an agent or containerizer error, or if |
| // the task was preempted by the QoS controller in an |
| // oversubscription scenario. |
| TASK_GONE = 11; // TERMINAL. |
| |
| // The task was running on an agent that the master cannot contact; |
| // the operator has asserted that the agent has been shutdown, but |
| // this has not been directly confirmed by the master. If the |
| // operator is correct, the task is not running and this is a |
| // terminal state; if the operator is mistaken, the task may still |
| // be running and might return to RUNNING in the future. |
| TASK_GONE_BY_OPERATOR = 12; |
| |
| // The master has no knowledge of the task. This is typically |
| // because either (a) the master never had knowledge of the task, or |
| // (b) the master forgot about the task because it garbage collected |
| // its metadata about the task. The task may or may not still be |
| // running. |
| TASK_UNKNOWN = 13; |
| } |
| |
| |
| /** |
| * Describes a resource limitation that caused a task failure. |
| */ |
| message TaskResourceLimitation { |
| // This field contains the resource whose limits were violated. |
| // |
| // NOTE: 'Resources' is used here because the resource may span |
| // multiple roles (e.g. `"mem(*):1;mem(role):2"`). |
| repeated Resource resources = 1; |
| } |
| |
| |
| /** |
| * A 128 bit (16 byte) UUID, see RFC 4122. |
| */ |
| message UUID { |
| required bytes value = 1; |
| } |
| |
| |
| /** |
| * Describes an operation, similar to `Offer.Operation`, with |
| * some additional information. |
| */ |
| message Operation { |
| optional FrameworkID framework_id = 1; |
| optional AgentID agent_id = 2; |
| required Offer.Operation info = 3; |
| required OperationStatus latest_status = 4; |
| |
| // All the statuses known to this operation. Some of the statuses in this |
| // list might not have been acknowledged yet. The statuses are ordered. |
| repeated OperationStatus statuses = 5; |
| |
| // This is the internal UUID for the operation, which is kept independently |
| // from the framework-specified operation ID, which is optional. |
| required UUID uuid = 6; |
| } |
| |
| |
| /** |
| * Describes possible operation states. |
| */ |
| enum OperationState { |
| // Default value if the enum is not set. See MESOS-4997. |
| OPERATION_UNSUPPORTED = 0; |
| |
| // Initial state. |
| OPERATION_PENDING = 1; |
| |
| // TERMINAL: The operation was successfully applied. |
| OPERATION_FINISHED = 2; |
| |
| // TERMINAL: The operation failed to apply. |
| OPERATION_FAILED = 3; |
| |
| // TERMINAL: The operation description contains an error. |
| OPERATION_ERROR = 4; |
| |
| // TERMINAL: The operation was dropped due to a transient error. |
| OPERATION_DROPPED = 5; |
| |
| // The operation affects an agent that has lost contact with the master, |
| // typically due to a network failure or partition. The operation may or may |
| // not still be pending. |
| OPERATION_UNREACHABLE = 6; |
| |
| // The operation affected an agent that the master cannot contact; |
| // the operator has asserted that the agent has been shutdown, but this has |
| // not been directly confirmed by the master. |
| // |
| // If the operator is correct, the operation is not pending and this is a |
| // terminal state; if the operator is mistaken, the operation may still be |
| // pending and might return to a different state in the future. |
| OPERATION_GONE_BY_OPERATOR = 7; |
| |
| // The operation affects an agent that the master recovered from its |
| // state, but that agent has not yet re-registered. |
| // |
| // The operation can transition to `OPERATION_UNREACHABLE` if the |
| // corresponding agent is marked as unreachable, and will transition to |
| // another status if the agent re-registers. |
| OPERATION_RECOVERING = 8; |
| |
| // The master has no knowledge of the operation. This is typically |
| // because either (a) the master never had knowledge of the operation, or |
| // (b) the master forgot about the operation because it garbage collected |
| // its metadata about the operation. The operation may or may not still be |
| // pending. |
| OPERATION_UNKNOWN = 9; |
| } |
| |
| |
| /** |
| * Describes the current status of an operation. |
| */ |
| message OperationStatus { |
| // While frameworks will only receive status updates for operations on which |
| // they have set an ID, this field is optional because this message is also |
| // used internally by Mesos components when the operation's ID has not been |
| // set. |
| optional OperationID operation_id = 1; |
| |
| required OperationState state = 2; |
| optional string message = 3; |
| |
| // Converted resources after applying the operation. This only |
| // applies if the `state` is `OPERATION_FINISHED`. |
| repeated Resource converted_resources = 4; |
| |
| // Statuses that are delivered reliably to the scheduler will |
| // include a `uuid`. The status is considered delivered once |
| // it is acknowledged by the scheduler. |
| optional UUID uuid = 5; |
| |
| // If the operation affects resources from a local resource provider, |
| // both `agent_id` and `resource_provider_id` will be set. |
| // |
| // If the operation affects resources that belong to an external |
| // resource provider, only `resource_provider_id` will be set. |
| // |
| // In certain cases, e.g., invalid operations, neither `uuid`, |
| // `slave_id` nor `resource_provider_id` will be set, and the |
| // scheduler does not need to acknowledge this status update. |
| optional AgentID agent_id = 6; |
| optional ResourceProviderID resource_provider_id = 7; |
| } |
| |
| |
| /** |
| * Describes the status of a check. Type and the corresponding field, i.e., |
| * `command` or `http` must be set. If the result of the check is not available |
| * (e.g., the check timed out), these fields must contain empty messages, i.e., |
| * `exit_code` or `status_code` will be unset. |
| * |
| * NOTE: This API is subject to change and the related feature is experimental. |
| */ |
| message CheckStatusInfo { |
| message Command { |
| // Exit code of a command check. It is the result of calling |
| // `WEXITSTATUS()` on `waitpid()` termination information on |
| // Posix and calling `GetExitCodeProcess()` on Windows. |
| optional int32 exit_code = 1; |
| } |
| |
| message Http { |
| // HTTP status code of an HTTP check. |
| optional uint32 status_code = 1; |
| } |
| |
| message Tcp { |
| // Whether a TCP connection succeeded. |
| optional bool succeeded = 1; |
| } |
| |
| // TODO(alexr): Consider adding a `data` field, which can contain, e.g., |
| // truncated stdout/stderr output for command checks or HTTP response body |
| // for HTTP checks. Alternatively, it can be an even shorter `message` field |
| // containing the last line of stdout or Reason-Phrase of the status line of |
| // the HTTP response. |
| |
| // The type of the check this status corresponds to. |
| optional CheckInfo.Type type = 1; |
| |
| // Status of a command check. |
| optional Command command = 2; |
| |
| // Status of an HTTP check. |
| optional Http http = 3; |
| |
| // Status of a TCP check. |
| optional Tcp tcp = 4; |
| |
| // TODO(alexr): Consider introducing a "last changed at" timestamp, since |
| // task status update's timestamp may not correspond to the last check's |
| // state, e.g., for reconciliation. |
| |
| // TODO(alexr): Consider introducing a `reason` enum here to explicitly |
| // distinguish between completed, delayed, and timed out checks. |
| } |
| |
| |
| /** |
| * Describes the current status of a task. |
| */ |
| message TaskStatus { |
| // Describes the source of the task status update. |
| enum Source { |
| SOURCE_MASTER = 0; |
| SOURCE_AGENT = 1; |
| SOURCE_EXECUTOR = 2; |
| } |
| |
| // Detailed reason for the task status update. |
| // Refer to docs/task-state-reasons.md for additional explanation. |
| enum Reason { |
| // TODO(jieyu): The default value when a caller doesn't check for |
| // presence is 0 and so ideally the 0 reason is not a valid one. |
| // Since this is not used anywhere, consider removing this reason. |
| REASON_COMMAND_EXECUTOR_FAILED = 0; |
| |
| REASON_CONTAINER_LAUNCH_FAILED = 21; |
| REASON_CONTAINER_LIMITATION = 19; |
| REASON_CONTAINER_LIMITATION_DISK = 20; |
| REASON_CONTAINER_LIMITATION_MEMORY = 8; |
| REASON_CONTAINER_PREEMPTED = 17; |
| REASON_CONTAINER_UPDATE_FAILED = 22; |
| REASON_MAX_COMPLETION_TIME_REACHED = 33; |
| REASON_EXECUTOR_REGISTRATION_TIMEOUT = 23; |
| REASON_EXECUTOR_REREGISTRATION_TIMEOUT = 24; |
| REASON_EXECUTOR_TERMINATED = 1; |
| REASON_EXECUTOR_UNREGISTERED = 2; // No longer used. |
| REASON_FRAMEWORK_REMOVED = 3; |
| REASON_GC_ERROR = 4; |
| REASON_INVALID_FRAMEWORKID = 5; |
| REASON_INVALID_OFFERS = 6; |
| REASON_IO_SWITCHBOARD_EXITED = 27; |
| REASON_MASTER_DISCONNECTED = 7; |
| REASON_RECONCILIATION = 9; |
| REASON_RESOURCES_UNKNOWN = 18; |
| REASON_AGENT_DISCONNECTED = 10; |
| REASON_AGENT_DRAINING = 34; |
| REASON_AGENT_REMOVED = 11; |
| REASON_AGENT_REMOVED_BY_OPERATOR = 31; |
| REASON_AGENT_REREGISTERED = 32; |
| REASON_AGENT_RESTARTED = 12; |
| REASON_AGENT_UNKNOWN = 13; |
| REASON_TASK_KILLED_DURING_LAUNCH = 30; |
| REASON_TASK_CHECK_STATUS_UPDATED = 28; |
| REASON_TASK_HEALTH_CHECK_STATUS_UPDATED = 29; |
| REASON_TASK_GROUP_INVALID = 25; |
| REASON_TASK_GROUP_UNAUTHORIZED = 26; |
| REASON_TASK_INVALID = 14; |
| REASON_TASK_UNAUTHORIZED = 15; |
| REASON_TASK_UNKNOWN = 16; |
| } |
| |
| required TaskID task_id = 1; |
| required TaskState state = 2; |
| optional string message = 4; // Possible message explaining state. |
| optional Source source = 9; |
| optional Reason reason = 10; |
| optional bytes data = 3; |
| optional AgentID agent_id = 5; |
| optional ExecutorID executor_id = 7; // TODO(benh): Use in master/agent. |
| optional double timestamp = 6; |
| |
| // Statuses that are delivered reliably to the scheduler will |
| // include a 'uuid'. The status is considered delivered once |
| // it is acknowledged by the scheduler. Schedulers can choose |
| // to either explicitly acknowledge statuses or let the scheduler |
| // driver implicitly acknowledge (default). |
| // |
| // TODO(bmahler): This is currently overwritten in the scheduler |
| // driver and executor driver, but executors will need to set this |
| // to a valid RFC-4122 UUID if using the HTTP API. |
| optional bytes uuid = 11; |
| |
| // Describes whether the task has been determined to be healthy (true) or |
| // unhealthy (false) according to the `health_check` field in `TaskInfo`. |
| optional bool healthy = 8; |
| |
| // Contains check status for the check specified in the corresponding |
| // `TaskInfo`. If no check has been specified, this field must be |
| // absent, otherwise it must be present even if the check status is |
| // not available yet. If the status update is triggered for a different |
| // reason than `REASON_TASK_CHECK_STATUS_UPDATED`, this field will contain |
| // the last known value. |
| // |
| // NOTE: A check-related task status update is triggered if and only if |
| // the value or presence of any field in `CheckStatusInfo` changes. |
| // |
| // NOTE: Check support in built-in executors is experimental. |
| optional CheckStatusInfo check_status = 15; |
| |
| // Labels are free-form key value pairs which are exposed through |
| // master and agent endpoints. Labels will not be interpreted or |
| // acted upon by Mesos itself. As opposed to the data field, labels |
| // will be kept in memory on master and agent processes. Therefore, |
| // labels should be used to tag TaskStatus message with light-weight |
| // meta-data. Labels should not contain duplicate key-value pairs. |
| optional Labels labels = 12; |
| |
| // Container related information that is resolved dynamically such as |
| // network address. |
| optional ContainerStatus container_status = 13; |
| |
| // The time (according to the master's clock) when the agent where |
| // this task was running became unreachable. This is only set on |
| // status updates for tasks running on agents that are unreachable |
| // (e.g., partitioned away from the master). |
| optional TimeInfo unreachable_time = 14; |
| |
| // If the reason field indicates a container resource limitation, |
| // this field optionally contains additional information. |
| optional TaskResourceLimitation limitation = 16; |
| } |
| |
| |
| /** |
| * Describes possible filters that can be applied to unused resources |
| * (see SchedulerDriver::launchTasks) to influence the allocator. |
| */ |
| message Filters { |
| // Time to consider unused resources refused. Note that all unused |
| // resources will be considered refused and use the default value |
| // (below) regardless of whether Filters was passed to |
| // SchedulerDriver::launchTasks. You MUST pass Filters with this |
| // field set to change this behavior (i.e., get another offer which |
| // includes unused resources sooner or later than the default). |
| // |
| // If this field is set to a number of seconds greater than 31536000 |
| // (365 days), then the resources will be considered refused for 365 |
| // days. If it is set to a negative number, then the default value |
| // will be used. |
| optional double refuse_seconds = 1 [default = 5.0]; |
| } |
| |
| |
| /** |
| * Describes a collection of environment variables. This is used with |
| * CommandInfo in order to set environment variables before running a |
| * command. The contents of each variable may be specified as a string |
| * or a Secret; only one of `value` and `secret` must be set. |
| */ |
| message Environment { |
| message Variable { |
| required string name = 1; |
| |
| enum Type { |
| UNKNOWN = 0; |
| VALUE = 1; |
| SECRET = 2; |
| } |
| |
| // In Mesos 1.2, the `Environment.variables.value` message was made |
| // optional. The default type for `Environment.variables.type` is now VALUE, |
| // which requires `value` to be set, maintaining backward compatibility. |
| // |
| // TODO(greggomann): The default can be removed in Mesos 2.1 (MESOS-7134). |
| optional Type type = 3 [default = VALUE]; |
| |
| // Only one of `value` and `secret` must be set. |
| optional string value = 2; |
| optional Secret secret = 4; |
| } |
| |
| repeated Variable variables = 1; |
| } |
| |
| |
| /** |
| * A generic (key, value) pair used in various places for parameters. |
| */ |
| message Parameter { |
| required string key = 1; |
| required string value = 2; |
| } |
| |
| |
| /** |
| * Collection of Parameter. |
| */ |
| message Parameters { |
| repeated Parameter parameter = 1; |
| } |
| |
| |
| /** |
| * Credential used in various places for authentication and |
| * authorization. |
| * |
| * NOTE: A 'principal' is different from 'FrameworkInfo.user'. The |
| * former is used for authentication and authorization while the |
| * latter is used to determine the default user under which the |
| * framework's executors/tasks are run. |
| */ |
| message Credential { |
| required string principal = 1; |
| optional string secret = 2; |
| } |
| |
| |
| /** |
| * Credentials used for framework authentication, HTTP authentication |
| * (where the common 'username' and 'password' are captured as |
| * 'principal' and 'secret' respectively), etc. |
| */ |
| message Credentials { |
| repeated Credential credentials = 1; |
| } |
| |
| |
| /** |
| * Secret used to pass privileged information. It is designed to provide |
| * pass-by-value or pass-by-reference semantics, where the REFERENCE type can be |
| * used by custom modules which interact with a secure back-end. |
| */ |
| message Secret |
| { |
| enum Type { |
| UNKNOWN = 0; |
| REFERENCE = 1; |
| VALUE = 2; |
| } |
| |
| // Can be used by modules to refer to a secret stored in a secure back-end. |
| // The `key` field is provided to permit reference to a single value within a |
| // secret containing arbitrary key-value pairs. |
| // |
| // For example, given a back-end secret store with a secret named |
| // "my-secret" containing the following key-value pairs: |
| // |
| // { |
| // "username": "my-user", |
| // "password": "my-password |
| // } |
| // |
| // the username could be referred to in a `Secret` by specifying |
| // "my-secret" for the `name` and "username" for the `key`. |
| message Reference |
| { |
| required string name = 1; |
| optional string key = 2; |
| } |
| |
| // Used to pass the value of a secret. |
| message Value |
| { |
| required bytes data = 1; |
| } |
| |
| optional Type type = 1; |
| |
| // Only one of `reference` and `value` must be set. |
| optional Reference reference = 2; |
| optional Value value = 3; |
| } |
| |
| |
| /** |
| * Rate (queries per second, QPS) limit for messages from a framework to master. |
| * Strictly speaking they are the combined rate from all frameworks of the same |
| * principal. |
| */ |
| message RateLimit { |
| // Leaving QPS unset gives it unlimited rate (i.e., not throttled), |
| // which also implies unlimited capacity. |
| optional double qps = 1; |
| |
| // Principal of framework(s) to be throttled. Should match |
| // FrameworkInfo.principal and Credential.principal (if using authentication). |
| required string principal = 2; |
| |
| // Max number of outstanding messages from frameworks of this principal |
| // allowed by master before the next message is dropped and an error is sent |
| // back to the sender. Messages received before the capacity is reached are |
| // still going to be processed after the error is sent. |
| // If unspecified, this principal is assigned unlimited capacity. |
| // NOTE: This value is ignored if 'qps' is not set. |
| optional uint64 capacity = 3; |
| } |
| |
| |
| /** |
| * Collection of RateLimit. |
| * Frameworks without rate limits defined here are not throttled unless |
| * 'aggregate_default_qps' is specified. |
| */ |
| message RateLimits { |
| // Items should have unique principals. |
| repeated RateLimit limits = 1; |
| |
| // All the frameworks not specified in 'limits' get this default rate. |
| // This rate is an aggregate rate for all of them, i.e., their combined |
| // traffic is throttled together at this rate. |
| optional double aggregate_default_qps = 2; |
| |
| // All the frameworks not specified in 'limits' get this default capacity. |
| // This is an aggregate value similar to 'aggregate_default_qps'. |
| optional uint64 aggregate_default_capacity = 3; |
| } |
| |
| |
| /** |
| * Describe an image used by tasks or executors. Note that it's only |
| * for tasks or executors launched by MesosContainerizer currently. |
| */ |
| message Image { |
| enum Type { |
| APPC = 1; |
| DOCKER = 2; |
| } |
| |
| // Protobuf for specifying an Appc container image. See: |
| // https://github.com/appc/spec/blob/master/spec/aci.md |
| message Appc { |
| // The name of the image. |
| required string name = 1; |
| |
| // An image ID is a string of the format "hash-value", where |
| // "hash" is the hash algorithm used and "value" is the hex |
| // encoded string of the digest. Currently the only permitted |
| // hash algorithm is sha512. |
| optional string id = 2; |
| |
| // Optional labels. Suggested labels: "version", "os", and "arch". |
| optional Labels labels = 3; |
| } |
| |
| message Docker { |
| // The name of the image. Expected format: |
| // [REGISTRY_HOST[:REGISTRY_PORT]/]REPOSITORY[:TAG|@TYPE:DIGEST] |
| // |
| // See: https://docs.docker.com/reference/commandline/pull/ |
| required string name = 1; |
| |
| // Credential to authenticate with docker registry. |
| // NOTE: This is not encrypted, therefore framework and operators |
| // should enable SSL when passing this information. |
| // |
| // This field has never been used in Mesos before and is |
| // deprecated since Mesos 1.3. Please use `config` below |
| // (see MESOS-7088 for details). |
| optional Credential credential = 2 [deprecated = true]; // Since 1.3. |
| |
| // Docker config containing credentials to authenticate with |
| // docker registry. The secret is expected to be a docker |
| // config file in JSON format with UTF-8 character encoding. |
| optional Secret config = 3; |
| } |
| |
| required Type type = 1; |
| |
| // Only one of the following image messages should be set to match |
| // the type. |
| optional Appc appc = 2; |
| optional Docker docker = 3; |
| |
| // With this flag set to false, the mesos containerizer will pull |
| // the docker/appc image from the registry even if the image is |
| // already downloaded on the agent. |
| optional bool cached = 4 [default = true]; |
| } |
| |
| |
| /** |
| * Describes how the mount will be propagated for a volume. See the |
| * following doc for more details about mount propagation: |
| * https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt |
| */ |
| message MountPropagation { |
| enum Mode { |
| UNKNOWN = 0; |
| |
| // The volume in a container will receive new mounts from the host |
| // or other containers, but filesystems mounted inside the |
| // container won't be propagated to the host or other containers. |
| // This is currently the default behavior for all volumes. |
| HOST_TO_CONTAINER = 1; |
| |
| // The volume in a container will receive new mounts from the host |
| // or other containers, and its own mounts will be propagated from |
| // the container to the host or other containers. |
| BIDIRECTIONAL = 2; |
| } |
| |
| optional Mode mode = 1; |
| } |
| |
| |
| /** |
| * Describes a volume mapping either from host to container or vice |
| * versa. Both paths can either refer to a directory or a file. |
| */ |
| message Volume { |
| enum Mode { |
| RW = 1; // read-write. |
| RO = 2; // read-only. |
| } |
| |
| // TODO(gyliu513): Make this as `optional` after deprecation cycle of 1.0. |
| required Mode mode = 3; |
| |
| // Path pointing to a directory or file in the container. If the path |
| // is a relative path, it is relative to the container work directory. |
| // If the path is an absolute path and the container does not have its |
| // own rootfs, that path must already exist in the agent host rootfs. |
| required string container_path = 1; |
| |
| // The following specifies the source of this volume. At most one of |
| // the following should be set. |
| |
| // Absolute path pointing to a directory or file on the host or a |
| // path relative to the container work directory. |
| optional string host_path = 2; |
| |
| // The source of the volume is an Image which describes a root |
| // filesystem which will be provisioned by Mesos. |
| optional Image image = 4; |
| |
| // Describes where a volume originates from. |
| message Source { |
| enum Type { |
| // This must be the first enum value in this list, to |
| // ensure that if 'type' is not set, the default value |
| // is UNKNOWN. This enables enum values to be added |
| // in a backwards-compatible way. See: MESOS-4997. |
| UNKNOWN = 0; |
| |
| // TODO(gyliu513): Add IMAGE as volume source type. |
| DOCKER_VOLUME = 1; |
| HOST_PATH = 4; |
| SANDBOX_PATH = 2; |
| SECRET = 3; |
| CSI_VOLUME = 5; |
| } |
| |
| message DockerVolume { |
| // Driver of the volume, it can be flocker, convoy, raxrey etc. |
| optional string driver = 1; |
| |
| // Name of the volume. |
| required string name = 2; |
| |
| // Volume driver specific options. |
| optional Parameters driver_options = 3; |
| } |
| |
| // Absolute path pointing to a directory or file on the host. |
| message HostPath { |
| required string path = 1; |
| optional MountPropagation mount_propagation = 2; |
| } |
| |
| // Describe a path from a container's sandbox. The container can |
| // be the current container (SELF), or its parent container |
| // (PARENT). PARENT allows all child containers to share a volume |
| // from their parent container's sandbox. It'll be an error if |
| // the current container is a top level container. |
| message SandboxPath { |
| enum Type { |
| UNKNOWN = 0; |
| SELF = 1; |
| PARENT = 2; |
| } |
| |
| optional Type type = 1; |
| |
| // A path relative to the corresponding container's sandbox. |
| // Note that upwards traversal (i.e. ../../abc) is not allowed. |
| required string path = 2; |
| } |
| |
| // A volume which will be handled by the `volume/csi` isolator. |
| message CSIVolume { |
| // The name of the CSI plugin. |
| required string plugin_name = 1; |
| |
| // Specifies a capability of a volume. |
| // https://github.com/container-storage-interface/spec/blob/v1.3.0/csi.proto#L379:L438 |
| message VolumeCapability { |
| // Indicates that the volume will be accessed via the block device API. |
| message BlockVolume { |
| // Intentionally empty, for now. |
| } |
| |
| // Indicates that the volume will be accessed via the filesystem API. |
| message MountVolume { |
| // The filesystem type. An empty string is equal to an unspecified |
| // field value. |
| optional string fs_type = 1; |
| |
| // The mount options that can be used for the volume. This field is |
| // OPTIONAL. `mount_flags` MAY contain sensitive information. |
| // Therefore, Mesos and the Plugin MUST NOT leak this information |
| // to untrusted entities. The total size of this repeated field |
| // SHALL NOT exceed 4 KiB. |
| repeated string mount_flags = 2; |
| } |
| |
| // Specifies how a volume can be accessed. |
| message AccessMode { |
| enum Mode { |
| UNKNOWN = 0; |
| |
| // Can only be published once as read/write on a single node, at |
| // any given time. |
| SINGLE_NODE_WRITER = 1; |
| |
| // Can only be published once as readonly on a single node, at |
| // any given time. |
| SINGLE_NODE_READER_ONLY = 2; |
| |
| // Can be published as readonly at multiple nodes simultaneously. |
| MULTI_NODE_READER_ONLY = 3; |
| |
| // Can be published at multiple nodes simultaneously. Only one of |
| // the node can be used as read/write. The rest will be readonly. |
| MULTI_NODE_SINGLE_WRITER = 4; |
| |
| // Can be published as read/write at multiple nodes |
| // simultaneously. |
| MULTI_NODE_MULTI_WRITER = 5; |
| } |
| |
| required Mode mode = 1; |
| } |
| |
| // Specifies what API the volume will be accessed using. One of the |
| // following fields MUST be specified. |
| oneof access_type { |
| BlockVolume block = 1; |
| MountVolume mount = 2; |
| } |
| |
| required AccessMode access_mode = 3; |
| } |
| |
| // Specifies the parameters used to stage/publish a pre-provisioned volume |
| // on an agent host. The fields are merged from `NodeStageVolumeRequest` |
| // and `NodePublishVolumeRequest` protobuf messages defined in CSI spec |
| // except two fields `staging_target_path` and `target_path` which will be |
| // internally determined by Mesos when staging/publishing the volume. |
| message StaticProvisioning { |
| required string volume_id = 1; |
| required VolumeCapability volume_capability = 2; |
| |
| // The secrets needed for staging/publishing the volume, e.g.: |
| // { |
| // "username": {"type": REFERENCE, "reference": {"name": "U_SECRET"}}, |
| // "password": {"type": REFERENCE, "reference": {"name": "P_SECRET"}} |
| // } |
| map<string, Secret> node_stage_secrets = 3; |
| map<string, Secret> node_publish_secrets = 4; |
| map<string, string> volume_context = 5; |
| } |
| |
| optional StaticProvisioning static_provisioning = 2; |
| } |
| |
| // Enum fields should be optional, see: MESOS-4997. |
| optional Type type = 1; |
| |
| // The following specifies the source of this volume. At most one of |
| // the following should be set. |
| |
| // The source of the volume created by docker volume driver. |
| optional DockerVolume docker_volume = 2; |
| |
| optional HostPath host_path = 5; |
| optional SandboxPath sandbox_path = 3; |
| |
| // The volume/secret isolator uses the secret-fetcher module (third-party or |
| // internal) downloads the secret and makes it available at container_path. |
| optional Secret secret = 4; |
| |
| optional CSIVolume csi_volume = 6; |
| } |
| |
| optional Source source = 5; |
| } |
| |
| |
| /** |
| * Describes a network request from a framework as well as network resolution |
| * provided by Mesos. |
| * |
| * A framework may request the network isolator on the Agent to isolate the |
| * container in a network namespace and create a virtual network interface. |
| * The `NetworkInfo` message describes the properties of that virtual |
| * interface, including the IP addresses and network isolation policy |
| * (network group membership). |
| * |
| * The NetworkInfo message is not interpreted by the Master or Agent and is |
| * intended to be used by Agent and Master modules implementing network |
| * isolation. If the modules are missing, the message is simply ignored. In |
| * future, the task launch will fail if there is no module providing the |
| * network isolation capabilities (MESOS-3390). |
| * |
| * An executor, Agent, or an Agent module may append NetworkInfos inside |
| * TaskStatus::container_status to provide information such as the container IP |
| * address and isolation groups. |
| */ |
| message NetworkInfo { |
| enum Protocol { |
| IPv4 = 1; |
| IPv6 = 2; |
| } |
| |
| // Specifies a request for an IP address, or reports the assigned container |
| // IP address. |
| // |
| // Users can request an automatically assigned IP (for example, via an |
| // IPAM service) or a specific IP by adding a NetworkInfo to the |
| // ContainerInfo for a task. On a request, specifying neither `protocol` |
| // nor `ip_address` means that any available address may be assigned. |
| message IPAddress { |
| // Specify IP address requirement. Set protocol to the desired value to |
| // request the network isolator on the Agent to assign an IP address to the |
| // container being launched. If a specific IP address is specified in |
| // ip_address, this field should not be set. |
| optional Protocol protocol = 1 [default = IPv4]; |
| |
| // Statically assigned IP provided by the Framework. This IP will be |
| // assigned to the container by the network isolator module on the Agent. |
| // This field should not be used with the protocol field above. |
| // |
| // If an explicit address is requested but is unavailable, the network |
| // isolator should fail the task. |
| optional string ip_address = 2; |
| } |
| |
| // When included in a ContainerInfo, each of these represent a |
| // request for an IP address. Each request can specify an explicit address |
| // or the IP protocol to use. |
| // |
| // When included in a TaskStatus message, these inform the framework |
| // scheduler about the IP addresses that are bound to the container |
| // interface. When there are no custom network isolator modules installed, |
| // this field is filled in automatically with the Agent IP address. |
| repeated IPAddress ip_addresses = 5; |
| |
| // Name of the network which will be used by network isolator to determine |
| // the network that the container joins. It's up to the network isolator |
| // to decide how to interpret this field. |
| optional string name = 6; |
| |
| // A group is the name given to a set of logically-related interfaces that |
| // are allowed to communicate among themselves. Network traffic is allowed |
| // between two container interfaces that share at least one network group. |
| // For example, one might want to create separate groups for isolating dev, |
| // testing, qa and prod deployment environments. |
| repeated string groups = 3; |
| |
| // To tag certain metadata to be used by Isolator/IPAM, e.g., rack, etc. |
| optional Labels labels = 4; |
| |
| // Specifies a port mapping request for the task on this network. |
| message PortMapping { |
| required uint32 host_port = 1; |
| required uint32 container_port = 2; |
| // Protocol to expose as (ie: tcp, udp). |
| optional string protocol = 3; |
| } |
| |
| repeated PortMapping port_mappings = 7; |
| }; |
| |
| |
| /** |
| * Encapsulation of `Capabilities` supported by Linux. |
| * Reference: http://linux.die.net/man/7/capabilities. |
| */ |
| message CapabilityInfo { |
| // We start the actual values at an offset(1000) because Protobuf 2 |
| // uses the first value as the default one. Separating the default |
| // value from the real first value helps to disambiguate them. This |
| // is especially valuable for backward compatibility. |
| // See: MESOS-4997. |
| enum Capability { |
| UNKNOWN = 0; |
| CHOWN = 1000; |
| DAC_OVERRIDE = 1001; |
| DAC_READ_SEARCH = 1002; |
| FOWNER = 1003; |
| FSETID = 1004; |
| KILL = 1005; |
| SETGID = 1006; |
| SETUID = 1007; |
| SETPCAP = 1008; |
| LINUX_IMMUTABLE = 1009; |
| NET_BIND_SERVICE = 1010; |
| NET_BROADCAST = 1011; |
| NET_ADMIN = 1012; |
| NET_RAW = 1013; |
| IPC_LOCK = 1014; |
| IPC_OWNER = 1015; |
| SYS_MODULE = 1016; |
| SYS_RAWIO = 1017; |
| SYS_CHROOT = 1018; |
| SYS_PTRACE = 1019; |
| SYS_PACCT = 1020; |
| SYS_ADMIN = 1021; |
| SYS_BOOT = 1022; |
| SYS_NICE = 1023; |
| SYS_RESOURCE = 1024; |
| SYS_TIME = 1025; |
| SYS_TTY_CONFIG = 1026; |
| MKNOD = 1027; |
| LEASE = 1028; |
| AUDIT_WRITE = 1029; |
| AUDIT_CONTROL = 1030; |
| SETFCAP = 1031; |
| MAC_OVERRIDE = 1032; |
| MAC_ADMIN = 1033; |
| SYSLOG = 1034; |
| WAKE_ALARM = 1035; |
| BLOCK_SUSPEND = 1036; |
| AUDIT_READ = 1037; |
| } |
| |
| repeated Capability capabilities = 1; |
| } |
| |
| |
| /** |
| * Encapsulation for Seccomp configuration, which is Linux specific. |
| */ |
| message SeccompInfo { |
| // A filename of the Seccomp profile. This should be a path |
| // relative to the directory containing Seccomp profiles, |
| // which is specified on the agent via the `--seccomp_config_dir` flag. |
| optional string profile_name = 1; |
| |
| // If set to `true`, Seccomp is not applied to the container. |
| // If not set or set to `false`, the container is launched with |
| // the profile specified in the `profile_name` field. |
| // |
| // NOTE: `profile_name` must not be specified if `unconfined` set to `true`. |
| // `profile_name` must be specified if `unconfined` is not set or |
| // is set to `false`. |
| optional bool unconfined = 2; |
| } |
| |
| |
| /** |
| * Encapsulation for Linux specific configuration. |
| * E.g, capabilities, limits etc. |
| */ |
| message LinuxInfo { |
| // Since 1.4.0, deprecated in favor of `effective_capabilities`. |
| optional CapabilityInfo capability_info = 1 [deprecated = true]; |
| |
| // The set of capabilities that are allowed but not initially |
| // granted to tasks. |
| optional CapabilityInfo bounding_capabilities = 2; |
| |
| // Represents the set of capabilities that the task will |
| // be executed with. |
| optional CapabilityInfo effective_capabilities = 3; |
| |
| // If set as 'true', the container shares the pid namespace with |
| // its parent. If the container is a top level container, it will |
| // share the pid namespace with the agent. If the container is a |
| // nested container, it will share the pid namespace with its |
| // parent container. This field will be ignored if 'namespaces/pid' |
| // isolator is not enabled. |
| optional bool share_pid_namespace = 4; |
| |
| // Represents Seccomp configuration, which is used for syscall filtering. |
| // This field is used to override the agent's default Seccomp configuration. |
| optional SeccompInfo seccomp = 5; |
| |
| enum IpcMode { |
| UNKNOWN = 0; |
| |
| // The container will have its own IPC namespace and /dev/shm, with a |
| // possibility to share them with its child containers. |
| PRIVATE = 1; |
| |
| // The container will share the IPC namespace and /dev/shm from its |
| // parent. If the container is a top level container, it will share |
| // the IPC namespace and /dev/shm from the agent host, if the container |
| // is a nested container, it will share the IPC namespace and /dev/shm |
| // from its parent container. The implication is if a nested container |
| // wants to share the IPC namespace and /dev/shm from the agent host, |
| // its parent container has to do it first. |
| SHARE_PARENT = 2; |
| } |
| |
| // There are two special cases that we need to handle for this field: |
| // 1. This field is not set: For backward compatibility we will keep the |
| // previous behavior: Top level container will have its own IPC namespace |
| // and nested container will share the IPC namespace from its parent |
| // container. If the container does not have its own rootfs, it will share |
| // agent's /dev/shm, otherwise it will have its own /dev/shm. |
| // 2. The `namespaces/ipc` isolator is not enabled: This field will be ignored |
| // in this case. For backward compatibility, in the `filesystem/linux` |
| // isolator we will keep the previous behavior: Any containers will share |
| // IPC namespace from agent, and if the container does not have its own |
| // rootfs, it will also share agent's /dev/shm, otherwise it will have its |
| // own /dev/shm. |
| // |
| // TODO(qianzhang): Remove the support for the above two cases after the |
| // deprecation cycle (started in 1.9). Eventually we want a single isolator |
| // (`namespaces/ipc`) to handle both IPC namespace and /dev/shm, and decouple |
| // /dev/shm from container's rootfs (i.e., whether a container will have its |
| // own /dev/shm depends on its `ipc_mode` instead of whether the container |
| // has its own rootfs). |
| optional IpcMode ipc_mode = 6; |
| |
| // Size of /dev/shm in MB. If not set, the size of the /dev/shm for container |
| // will be value of the `--default_container_shm_size` agent flag, if that |
| // flag is not set too, the size of the /dev/shm will be half of the host RAM |
| // which is the default behavior of Linux. This field will be ignored for the |
| // container which shares /dev/shm from its parent and it will be also ignored |
| // for any containers if the `namespaces/ipc` isolator is not enabled. Please |
| // note that we only support setting this field when the `ipc_mode` field is |
| // set to `PRIVATE` otherwise the container launch will be rejected. |
| optional uint32 shm_size = 7; |
| |
| // If set as 'true', the container will share the cgroups from its parent |
| // container, otherwise it will have its own cgroups created. Please note: |
| // 1. For tasks in a task group launched via the LAUNCH_GROUP operation, |
| // this field may be set to 'true' or 'false'. Resource limits may only be |
| // set for tasks in a task group when this field is set to 'false'. |
| // 2. For tasks launched via the LAUNCH operation, this field may only be set |
| // to 'true', and in this case resource limits may be set on these tasks. |
| // 3. For containers launched via the agent's LAUNCH_NESTED_CONTAINER_SESSION |
| // call, this field must be set to 'true'. |
| // 4. For executor containers, this field may only be set to 'false'. |
| // 5. All tasks under a single executor must share the same value of this |
| // field, if it is set. Note that this means that all tasks within a single |
| // task group must set this field to the same value. |
| optional bool share_cgroups = 8 [default = true]; |
| } |
| |
| |
| /** |
| * Encapsulation for POSIX rlimits, see |
| * http://pubs.opengroup.org/onlinepubs/009695399/functions/getrlimit.html. |
| * Note that some types might only be defined for Linux. |
| * We use a custom prefix to avoid conflict with existing system macros |
| * (e.g., `RLIMIT_CPU` or `NOFILE`). |
| */ |
| message RLimitInfo { |
| message RLimit { |
| enum Type { |
| UNKNOWN = 0; |
| RLMT_AS = 1; |
| RLMT_CORE = 2; |
| RLMT_CPU = 3; |
| RLMT_DATA = 4; |
| RLMT_FSIZE = 5; |
| RLMT_LOCKS = 6; |
| RLMT_MEMLOCK = 7; |
| RLMT_MSGQUEUE = 8; |
| RLMT_NICE = 9; |
| RLMT_NOFILE = 10; |
| RLMT_NPROC = 11; |
| RLMT_RSS = 12; |
| RLMT_RTPRIO = 13; |
| RLMT_RTTIME = 14; |
| RLMT_SIGPENDING = 15; |
| RLMT_STACK = 16; |
| } |
| optional Type type = 1; |
| |
| // Either both are set or both are not set. |
| // If both are not set, it represents unlimited. |
| // If both are set, we require `soft` <= `hard`. |
| optional uint64 hard = 2; |
| optional uint64 soft = 3; |
| } |
| |
| repeated RLimit rlimits = 1; |
| } |
| |
| |
| /** |
| * Describes the information about (pseudo) TTY that can |
| * be attached to a process running in a container. |
| */ |
| message TTYInfo { |
| message WindowSize { |
| required uint32 rows = 1; |
| required uint32 columns = 2; |
| } |
| |
| optional WindowSize window_size = 1; |
| } |
| |
| |
| /** |
| * Describes a container configuration and allows extensible |
| * configurations for different container implementations. |
| * |
| * NOTE: `ContainerInfo` may be specified, e.g., by a task, even if no |
| * container image is provided. In this case neither `MesosInfo` nor |
| * `DockerInfo` is set, the required `type` must be `MESOS`. This is to |
| * address a case when a task without an image, e.g., a shell script |
| * with URIs, wants to use features originally designed for containers, |
| * for example custom network isolation via `NetworkInfo`. |
| */ |
| message ContainerInfo { |
| // All container implementation types. |
| // For each type there should be a field in the ContainerInfo itself |
| // with exactly matching name in lowercase. |
| enum Type { |
| DOCKER = 1; |
| MESOS = 2; |
| } |
| |
| message DockerInfo { |
| // The docker image that is going to be passed to the registry. |
| required string image = 1; |
| |
| // Network options. |
| enum Network { |
| HOST = 1; |
| BRIDGE = 2; |
| NONE = 3; |
| USER = 4; |
| } |
| |
| optional Network network = 2 [default = HOST]; |
| |
| message PortMapping { |
| required uint32 host_port = 1; |
| required uint32 container_port = 2; |
| // Protocol to expose as (ie: tcp, udp). |
| optional string protocol = 3; |
| } |
| |
| repeated PortMapping port_mappings = 3; |
| |
| optional bool privileged = 4 [default = false]; |
| |
| // Allowing arbitrary parameters to be passed to docker CLI. |
| // Note that anything passed to this field is not guaranteed |
| // to be supported moving forward, as we might move away from |
| // the docker CLI. |
| repeated Parameter parameters = 5; |
| |
| // With this flag set to true, the docker containerizer will |
| // pull the docker image from the registry even if the image |
| // is already downloaded on the agent. |
| optional bool force_pull_image = 6; |
| |
| // The name of volume driver plugin. |
| optional string volume_driver = 7 [deprecated = true]; // Since 1.0 |
| } |
| |
| message MesosInfo { |
| optional Image image = 1; |
| } |
| |
| required Type type = 1; |
| repeated Volume volumes = 2; |
| optional string hostname = 4; |
| |
| // At most one of the following *Info messages should be set to match |
| // the type, i.e. the "protobuf union" in ContainerInfo should be valid. |
| optional DockerInfo docker = 3; |
| optional MesosInfo mesos = 5; |
| |
| // A list of network requests. A framework can request multiple IP addresses |
| // for the container. |
| repeated NetworkInfo network_infos = 7; |
| |
| // Linux specific information for the container. |
| optional LinuxInfo linux_info = 8; |
| |
| // (POSIX only) rlimits of the container. |
| optional RLimitInfo rlimit_info = 9; |
| |
| // If specified a tty will be attached to the container entrypoint. |
| optional TTYInfo tty_info = 10; |
| } |
| |
| |
| /** |
| * Container related information that is resolved during container |
| * setup. The information is sent back to the framework as part of the |
| * TaskStatus message. |
| */ |
| message ContainerStatus { |
| optional ContainerID container_id = 4; |
| |
| // This field can be reliably used to identify the container IP address. |
| repeated NetworkInfo network_infos = 1; |
| |
| // Information about Linux control group (cgroup). |
| optional CgroupInfo cgroup_info = 2; |
| |
| // Information about Executor PID. |
| optional uint32 executor_pid = 3; |
| } |
| |
| |
| /** |
| * Linux control group (cgroup) information. |
| */ |
| message CgroupInfo { |
| // Configuration of a blkio cgroup subsystem. |
| message Blkio { |
| enum Operation { |
| UNKNOWN = 0; |
| TOTAL = 1; |
| READ = 2; |
| WRITE = 3; |
| SYNC = 4; |
| ASYNC = 5; |
| DISCARD = 6; |
| } |
| |
| // Describes a stat value without the device descriptor part. |
| message Value { |
| optional Operation op = 1; // Required. |
| optional uint64 value = 2; // Required. |
| } |
| |
| message CFQ { |
| message Statistics { |
| // Stats are grouped by block devices. If `device` is not |
| // set, it represents `Total`. |
| optional Device.Number device = 1; |
| // blkio.sectors |
| optional uint64 sectors = 2; |
| // blkio.time |
| optional uint64 time = 3; |
| // blkio.io_serviced |
| repeated Value io_serviced = 4; |
| // blkio.io_service_bytes |
| repeated Value io_service_bytes = 5; |
| // blkio.io_service_time |
| repeated Value io_service_time = 6; |
| // blkio.io_wait_time |
| repeated Value io_wait_time = 7; |
| // blkio.io_merged |
| repeated Value io_merged = 8; |
| // blkio.io_queued |
| repeated Value io_queued = 9; |
| } |
| |
| // TODO(jasonlai): Add fields for blkio weight and weight |
| // device. |
| } |
| |
| message Throttling { |
| message Statistics { |
| // Stats are grouped by block devices. If `device` is not |
| // set, it represents `Total`. |
| optional Device.Number device = 1; |
| // blkio.throttle.io_serviced |
| repeated Value io_serviced = 2; |
| // blkio.throttle.io_service_bytes |
| repeated Value io_service_bytes = 3; |
| } |
| |
| // TODO(jasonlai): Add fields for blkio.throttle.*_device. |
| } |
| |
| message Statistics { |
| repeated CFQ.Statistics cfq = 1; |
| repeated CFQ.Statistics cfq_recursive = 2; |
| repeated Throttling.Statistics throttling = 3; |
| } |
| } |
| |
| // Configuration of a net_cls cgroup subsystem. |
| message NetCls { |
| // The 32-bit classid consists of two parts, a 16 bit major handle |
| // and a 16-bit minor handle. The major and minor handle are |
| // represented using the format 0xAAAABBBB, where 0xAAAA is the |
| // 16-bit major handle and 0xBBBB is the 16-bit minor handle. |
| optional uint32 classid = 1; |
| } |
| |
| optional NetCls net_cls = 1; |
| } |
| |
| |
| /** |
| * Collection of labels. Labels should not contain duplicate key-value |
| * pairs. |
| */ |
| message Labels { |
| repeated Label labels = 1; |
| } |
| |
| |
| /** |
| * Key, value pair used to store free form user-data. |
| */ |
| message Label { |
| required string key = 1; |
| optional string value = 2; |
| } |
| |
| |
| /** |
| * Named port used for service discovery. |
| */ |
| message Port { |
| // Port number on which the framework exposes a service. |
| required uint32 number = 1; |
| |
| // Name of the service hosted on this port. |
| optional string name = 2; |
| |
| // Layer 4-7 protocol on which the framework exposes its services. |
| optional string protocol = 3; |
| |
| // This field restricts discovery within a framework (FRAMEWORK), |
| // within a Mesos cluster (CLUSTER), or places no restrictions (EXTERNAL). |
| // The visibility setting for a Port overrides the general visibility setting |
| // in the DiscoveryInfo. |
| optional DiscoveryInfo.Visibility visibility = 4; |
| |
| // This can be used to decorate the message with metadata to be |
| // interpreted by external applications such as firewalls. |
| optional Labels labels = 5; |
| } |
| |
| |
| /** |
| * Collection of ports. |
| */ |
| message Ports { |
| repeated Port ports = 1; |
| } |
| |
| |
| /** |
| * Service discovery information. |
| * The visibility field restricts discovery within a framework (FRAMEWORK), |
| * within a Mesos cluster (CLUSTER), or places no restrictions (EXTERNAL). |
| * Each port in the ports field also has an optional visibility field. |
| * If visibility is specified for a port, it overrides the default service-wide |
| * DiscoveryInfo.visibility for that port. |
| * The environment, location, and version fields provide first class support for |
| * common attributes used to differentiate between similar services. The |
| * environment may receive values such as PROD/QA/DEV, the location field may |
| * receive values like EAST-US/WEST-US/EUROPE/AMEA, and the version field may |
| * receive values like v2.0/v0.9. The exact use of these fields is up to each |
| * service discovery system. |
| */ |
| message DiscoveryInfo { |
| enum Visibility { |
| FRAMEWORK = 0; |
| CLUSTER = 1; |
| EXTERNAL = 2; |
| } |
| |
| required Visibility visibility = 1; |
| optional string name = 2; |
| optional string environment = 3; |
| optional string location = 4; |
| optional string version = 5; |
| optional Ports ports = 6; |
| optional Labels labels = 7; |
| } |
| |
| |
| /** |
| * Named WeightInfo to indicate resource allocation |
| * priority between the different roles. |
| */ |
| message WeightInfo { |
| required double weight = 1; |
| |
| // Related role name. |
| optional string role = 2; |
| } |
| |
| |
| /** |
| * Version information of a component. |
| */ |
| message VersionInfo { |
| required string version = 1; |
| optional string build_date = 2; |
| optional double build_time = 3; |
| optional string build_user = 4; |
| optional string git_sha = 5; |
| optional string git_branch = 6; |
| optional string git_tag = 7; |
| } |
| |
| |
| /** |
| * Flag consists of a name and optionally its value. |
| */ |
| message Flag { |
| required string name = 1; |
| optional string value = 2; |
| } |
| |
| |
| /** |
| * Describes a Role. Roles can be used to specify that certain resources are |
| * reserved for the use of one or more frameworks. |
| */ |
| message Role { |
| required string name = 1; |
| required double weight = 2; |
| repeated FrameworkID frameworks = 3; |
| |
| // TODO(bmahler): Deprecate `resources` and introduce quota, |
| // consumed quota, allocated, offered, and reserved resource |
| // quantity fields. This is blocked by MESOS-9497 since the |
| // computation of these quantities is currently expensive. |
| repeated Resource resources = 4; |
| } |
| |
| |
| /** |
| * Metric consists of a name and optionally its value. |
| */ |
| message Metric { |
| required string name = 1; |
| optional double value = 2; |
| } |
| |
| |
| /** |
| * Describes a File. |
| */ |
| message FileInfo { |
| // Absolute path to the file. |
| required string path = 1; |
| |
| // Number of hard links. |
| optional int32 nlink = 2; |
| |
| // Total size in bytes. |
| optional uint64 size = 3; |
| |
| // Last modification time. |
| optional TimeInfo mtime = 4; |
| |
| // Represents a file's mode and permission bits. The bits have the same |
| // definition on all systems and is portable. |
| optional uint32 mode = 5; |
| |
| // User ID of owner. |
| optional string uid = 6; |
| |
| // Group ID of owner. |
| optional string gid = 7; |
| } |
| |
| |
| /** |
| * Describes information about a device. |
| */ |
| message Device { |
| message Number { |
| required uint64 major_number = 1; |
| required uint64 minor_number = 2; |
| } |
| |
| optional string path = 1; |
| optional Number number = 2; |
| } |
| |
| |
| /** |
| * Describes a device whitelist entry that expose from host to container. |
| */ |
| message DeviceAccess { |
| message Access { |
| optional bool read = 1; |
| optional bool write = 2; |
| optional bool mknod = 3; |
| } |
| required Device device = 1; |
| required Access access = 2; |
| } |
| |
| |
| message DeviceWhitelist { |
| repeated DeviceAccess allowed_devices = 1; |
| } |
| |
| |
| enum DrainState { |
| UNKNOWN = 0; |
| |
| // The agent is currently draining. |
| DRAINING = 1; |
| |
| // The agent has been drained: all tasks have terminated, all terminal |
| // task status updates have been acknowledged by the frameworks, and all |
| // operations have finished and had their terminal updates acknowledged. |
| DRAINED = 2; |
| } |
| |
| |
| message DrainConfig { |
| // An upper bound for tasks with a KillPolicy. |
| // If a task has a KillPolicy grace period greater than this value, this value |
| // will be used instead. This allows the operator to limit the maximum time it |
| // will take the agent to drain. If this field is unset, the task's KillPolicy |
| // or the executor's default grace period is used. |
| // |
| // NOTE: Grace periods start when the executor receives the associated kill. |
| // If, for example, the agent is unreachable when this call is made, |
| // tasks will still receive their full grace period to kill gracefully. |
| optional DurationInfo max_grace_period = 1; |
| |
| // Whether or not this agent will be removed permanently from the cluster when |
| // draining is complete. This transition is automatic and does **NOT** require |
| // a separate call to `MarkAgentGone`. If this field is unset, then the |
| // default value of `false` is used. |
| // |
| // Compared to `MarkAgentGone`, which is used for unreachable agents, |
| // marking agents gone after draining will respect kill policies. |
| // To notify frameworks, tasks terminated during draining will return |
| // a `TASK_GONE_BY_OPERATOR` status update instead of any other terminal |
| // status. Executors will not need to account for this case, because |
| // the terminal status update will be intercepted and modified by the agent. |
| optional bool mark_gone = 2 [default = false]; |
| } |
| |
| |
| message DrainInfo { |
| // The drain state of the agent. |
| required DrainState state = 1; |
| |
| // The configuration used to drain the agent. |
| required DrainConfig config = 2; |
| } |