| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // Code generated by make build. DO NOT EDIT |
| |
| syntax = "proto3"; |
| package si.v1; |
| |
| import "google/protobuf/descriptor.proto"; |
| |
| option go_package = "lib/go/si"; |
| |
| extend google.protobuf.FieldOptions { |
| // Indicates that a field MAY contain information that is sensitive |
| // and MUST be treated as such (e.g. not logged). |
| bool si_secret = 1059; |
| } |
| service Scheduler { |
| // Register a RM, if it is a reconnect from previous RM the call will |
| // trigger a cleanup of all in-memory data and resync with RM. |
| rpc RegisterResourceManager (RegisterResourceManagerRequest) |
| returns (RegisterResourceManagerResponse) { } |
| |
| // Update Scheduler status (this includes node status update, allocation request |
| // updates, etc. And receive updates from scheduler for allocation changes, |
| // any required status changes, etc. |
| // Update allocation request |
| rpc UpdateAllocation(stream AllocationRequest) |
| returns (stream AllocationResponse) { } |
| |
| // Update application request |
| rpc UpdateApplication(stream ApplicationRequest) |
| returns (stream ApplicationResponse) { } |
| |
| // Update node info |
| rpc UpdateNode(stream NodeRequest) |
| returns (stream NodeResponse) { } |
| } |
| |
| /* |
| service AdminService { |
| // Include |
| // addQueueInfo. |
| // removeQueueInfo. |
| // updateQueueInfo. |
| rpc UpdateConfig (UpdateConfigRequest) |
| returns (UpdateConfigResponse) {} |
| } |
| */ |
| |
| /* |
| service MetricsService { |
| } |
| */ |
| message RegisterResourceManagerRequest { |
| // An ID which can uniquely identify a RM **cluster**. (For example, if a RM cluster has multiple manager instances for HA purpose, they should use the same information when do registration). |
| // If RM register with the same ID, all previous scheduling state in memory will be cleaned up, and expect RM report full scheduling state after registration. |
| string rmID = 1; |
| |
| // Version of RM scheduler interface client. |
| string version = 2; |
| |
| // Policy group name: |
| // This defines which policy to use. Policy should be statically configured. (Think about network security group concept of ec2). |
| // Different RMs can refer to the same policyGroup if their static configuration is identical. |
| string policyGroup = 3; |
| } |
| |
| // Upon success, scheduler returns RegisterResourceManagerResponse to RM, otherwise RM receives exception. |
| message RegisterResourceManagerResponse { |
| // Intentionally empty. |
| } |
| message AllocationRequest { |
| // New allocation requests or replace existing allocation request (if allocationID is same) |
| repeated AllocationAsk asks = 1; |
| |
| // Allocations can be released. |
| AllocationReleasesRequest releases = 2; |
| |
| // ID of RM, this will be used to identify which RM of the request comes from. |
| string rmID = 3; |
| } |
| |
| message ApplicationRequest { |
| // RM should explicitly add application when allocation request also explictly belongs to application. |
| // This is optional if allocation request doesn't belong to a application. (Independent allocation) |
| repeated AddApplicationRequest new = 1; |
| |
| // RM can also remove applications, all allocation/allocation requests associated with the application will be removed |
| repeated RemoveApplicationRequest remove = 2; |
| |
| // ID of RM, this will be used to identify which RM of the request comes from. |
| string rmID = 3; |
| } |
| |
| message NodeRequest { |
| // New node can be scheduled. If a node is notified to be "unscheduable", it needs to be part of this field as well. |
| repeated NodeInfo nodes = 1; |
| |
| // ID of RM, this will be used to identify which RM of the request comes from. |
| string rmID = 2; |
| } |
| |
| message AllocationResponse { |
| // New allocations |
| repeated Allocation new = 1; |
| |
| // Released allocations, this could be either ack from scheduler when RM asks to terminate some allocations. |
| // Or it could be decision made by scheduler (such as preemption or timeout). |
| repeated AllocationRelease released = 2; |
| |
| // Released allocation asks(placeholder), when the placeholder allocation times out |
| repeated AllocationAskRelease releasedAsks = 3; |
| |
| // Rejected allocation requests |
| repeated RejectedAllocationAsk rejected = 4; |
| } |
| |
| message ApplicationResponse { |
| // Rejected Applications |
| repeated RejectedApplication rejected = 1; |
| |
| // Accepted Applications |
| repeated AcceptedApplication accepted = 2; |
| |
| // Updated Applications |
| repeated UpdatedApplication updated = 3; |
| } |
| |
| message NodeResponse { |
| // Rejected Node Registrations |
| repeated RejectedNode rejected = 1; |
| |
| // Accepted Node Registrations |
| repeated AcceptedNode accepted = 2; |
| } |
| |
| message UpdatedApplication { |
| // The application ID that was updated |
| string applicationID = 1; |
| // State of the application |
| string state = 2; |
| // Timestamp of the state transition |
| int64 stateTransitionTimestamp = 3; |
| // Detailed message |
| string message = 4; |
| } |
| |
| message RejectedApplication { |
| // The application ID that was rejected |
| string applicationID = 1; |
| // A human-readable reason message |
| string reason = 2; |
| } |
| |
| message AcceptedApplication { |
| // The application ID that was accepted |
| string applicationID = 1; |
| } |
| |
| message RejectedNode { |
| // The node ID that was rejected |
| string nodeID = 1; |
| // A human-readable reason message |
| string reason = 2; |
| } |
| |
| message AcceptedNode { |
| // The node ID that was accepted |
| string nodeID = 1; |
| } |
| message Priority { |
| oneof priority { |
| // Priority of each ask, higher is more important. |
| // How to deal with Priority is handled by each scheduler implementation. |
| int32 priorityValue = 1; |
| |
| // PriorityClass is used for app owners to set named priorities. This is a portable way for |
| // app owners have a consistent way to setup priority across clusters |
| string priorityClassName = 2; |
| } |
| } |
| |
| // A sparse map of resource to Quantity. |
| message Resource { |
| map<string, Quantity> resources = 1; |
| } |
| |
| // Quantity includes a single int64 value |
| message Quantity { |
| int64 value = 1; |
| } |
| message AllocationAsk { |
| // Allocation key is used by both of scheduler and RM to track allocations. |
| // It doesn't have to be same as RM's internal allocation id (such as Pod name of K8s or ContainerID of YARN). |
| // Allocations from the same AllocationAsk which are returned to the RM at the same time will have the same allocationKey. |
| // The request is considered an update of the existing AllocationAsk if an ALlocationAsk with the same allocationKey |
| // already exists. |
| string allocationKey = 1; |
| // The application ID this allocation ask belongs to |
| string applicationID = 2; |
| // The partition the application belongs to |
| string partitionName = 3; |
| // The amount of resources per ask |
| Resource resourceAsk = 4; |
| // Maximum number of allocations |
| int32 maxAllocations = 5; |
| // Priority of ask |
| Priority priority = 6; |
| // Execution timeout: How long this allocation will be terminated (by scheduler) |
| // once allocated by scheduler, 0 or negative value means never expire. |
| int64 executionTimeoutMilliSeconds = 7; |
| // A set of tags for this spscific AllocationAsk. Allocation level tags are used in placing this specific |
| // ask on nodes in the cluster. These tags are used in the PlacementConstraints. |
| // These tags are optional. |
| map<string, string> tags = 8; |
| // The name of the TaskGroup this ask belongs to |
| string taskGroupName = 9; |
| // Is this a placeholder ask (true) or a real ask (false), defaults to false |
| // ignored if the taskGroupName is not set |
| bool placeholder = 10; |
| } |
| message AddApplicationRequest { |
| // The ID of the application, must be unique |
| string applicationID = 1; |
| // The queue this application is requesting. The scheduler will place the application into a |
| // queue according to policy, taking into account the requested queue as per the policy. |
| string queueName = 2; |
| // The partition the application belongs to |
| string partitionName = 3; |
| // The user group information of the application owner |
| UserGroupInformation ugi = 4; |
| // A set of tags for the application. These tags provide application level generic inforamtion. |
| // The tags are optional and are used in placing an appliction or scheduling. |
| // Application tags are not considered when processing AllocationAsks. |
| map<string, string> tags = 5; |
| // Execution timeout: How long this application can be in a running state |
| // 0 or negative value means never expire. |
| int64 executionTimeoutMilliSeconds = 6; |
| // The total amount of resources gang placeholders will request |
| Resource placeholderAsk = 7; |
| // Gang scheduling style can be hard (the application will fail after placeholder timeout) |
| // or soft (after the timeout the application will be scheduled as a normal application) |
| string gangSchedulingStyle = 8; |
| } |
| |
| message RemoveApplicationRequest { |
| // The ID of the application to remove |
| string applicationID = 1; |
| // The partition the application belongs to |
| string partitionName = 2; |
| } |
| message UserGroupInformation { |
| // the user name |
| string user = 1; |
| // the list of groups of the user, can be empty |
| repeated string groups = 2; |
| } |
| message Allocation { |
| // AllocationKey from AllocationAsk |
| string allocationKey = 1; |
| // Allocation tags from AllocationAsk |
| map<string, string> allocationTags = 2; |
| // UUID of the allocation |
| string UUID = 3; |
| // Resource for each allocation |
| Resource resourcePerAlloc = 5; |
| // Priority of ask |
| Priority priority = 6; |
| // Queue which the allocation belongs to |
| string queueName = 7; |
| // Node which the allocation belongs to |
| string nodeID = 8; |
| // The ID of the application |
| string applicationID = 9; |
| // Partition of the allocation |
| string partitionName = 10; |
| // The name of the TaskGroup this allocation belongs to |
| string taskGroupName = 11; |
| // Is this a placeholder allocation (true) or a real allocation (false), defaults to false |
| // ignored if the taskGroupName is not set |
| bool placeholder = 12; |
| } |
| message AllocationReleasesRequest { |
| // The allocations to release |
| repeated AllocationRelease allocationsToRelease = 1; |
| // The asks to release |
| repeated AllocationAskRelease allocationAsksToRelease = 2; |
| } |
| |
| enum TerminationType { |
| UNKNOWN_TERMINATION_TYPE = 0;//TerminationType not set |
| STOPPED_BY_RM = 1; // Stopped or killed by ResourceManager (created by RM) |
| TIMEOUT = 2; // Timed out based on the executionTimeoutMilliSeconds (created by core) |
| PREEMPTED_BY_SCHEDULER = 3; // Preempted allocation by scheduler (created by core) |
| PLACEHOLDER_REPLACED = 4; // Placeholder allocation replaced by real allocation (created by core) |
| } |
| |
| // Release allocation: this is a bidirectional message. The Terminationtype defines the origin, or creator, |
| // as per the comment. The confirmation or response from the receiver is the same message with the same |
| // termination type set. |
| message AllocationRelease { |
| |
| // The name of the partition the allocation belongs to |
| string partitionName = 1; |
| // The application the allocation belongs to |
| string applicationID = 2; |
| // The UUID of the allocation to release, if not set all allocations are released for |
| // the applicationID |
| string UUID = 3; |
| // Termination type of the released allocation |
| TerminationType terminationType = 4; |
| // human-readable message |
| string message = 5; |
| } |
| |
| // Release ask |
| message AllocationAskRelease { |
| // Which partition to release the ask from, required. |
| string partitionName = 1; |
| // optional, when this is set, filter allocation key by application id. |
| // when application id is set and allocationKey is not set, release all allocations key under the application id. |
| string applicationID = 2; |
| // optional, when this is set, only release allocation ask by specified |
| string allocationkey = 3; |
| // Termination type of the released allocation ask |
| TerminationType terminationType = 4; |
| // For human-readable message |
| string message = 5; |
| } |
| message NodeInfo { |
| // Action from RM |
| enum ActionFromRM { |
| |
| //ActionFromRM not set |
| UNKNOWN_ACTION_FROM_RM = 0; |
| |
| // Create Node |
| CREATE = 1; |
| |
| // Update node resources, attributes. |
| UPDATE = 2; |
| |
| // Do not allocate new allocations on the node. |
| DRAIN_NODE = 3; |
| |
| // Decomission node, it will immediately stop allocations on the node and |
| // remove the node from schedulable lists. |
| DECOMISSION = 4; |
| |
| // From Draining state to SCHEDULABLE state. |
| // If node is not in draining state, error will be thrown |
| DRAIN_TO_SCHEDULABLE = 5; |
| } |
| |
| // ID of node, the node must exist to be updated |
| string nodeID = 1; |
| |
| // Action to perform by the scheduler |
| ActionFromRM action = 2; |
| |
| // New attributes of node, which will replace previously reported attribute. |
| map<string, string> attributes = 3; |
| |
| // new schedulable resource, scheduler may preempt allocations on the |
| // node or schedule more allocations accordingly. |
| Resource schedulableResource = 4; |
| |
| // when the scheduler is co-exist with some other schedulers, some node |
| // resources might be occupied (allocated) by other schedulers. |
| Resource occupiedResource = 5; |
| |
| // Allocated resources, this will be added when node registered to RM (recovery) |
| repeated Allocation existingAllocations = 6; |
| } |
| message RejectedAllocationAsk { |
| string allocationKey = 1; |
| // The ID of the application |
| string applicationID = 2; |
| // A human-readable reason message |
| string reason = 3; |
| } |
| message PredicatesArgs { |
| // allocation key identifies a container, the predicates function is going to check |
| // if this container is eligible to be placed ont to a node. |
| string allocationKey = 1; |
| // the node ID the container is assigned to. |
| string nodeID = 2; |
| // run the predicates for alloactions (true) or reservations (false) |
| bool allocate = 3; |
| } |
| |
| message ReSyncSchedulerCacheArgs { |
| // a list of assumed allocations, this will be sync'd to scheduler cache. |
| repeated AssumedAllocation assumedAllocations = 1; |
| // a list of allocations to forget |
| repeated ForgotAllocation forgetAllocations = 2; |
| } |
| |
| message AssumedAllocation { |
| // allocation key used to identify a container. |
| string allocationKey = 1; |
| // the node ID the container is assumed to be allocated to, this info is stored in scheduler cache. |
| string nodeID = 2; |
| } |
| |
| message ForgotAllocation { |
| // allocation key used to identify a container. |
| string allocationKey = 1; |
| } |
| |
| message UpdateContainerSchedulingStateRequest { |
| // container scheduling states |
| enum SchedulingState { |
| //SchedulingState not set |
| UNKNOWN_SCHEDULING_STATE = 0; |
| // the container is being skipped by the scheduler |
| SKIPPED = 1; |
| // the container is scheduled and it has been assigned to a node |
| SCHEDULED = 2; |
| // the container is reserved on some node, but not yet assigned |
| RESERVED = 3; |
| // scheduler has visited all candidate nodes for this container |
| // but non of them could satisfy this container's requirement |
| FAILED = 4; |
| } |
| |
| // application ID |
| string applicartionID = 1; |
| |
| // allocation key used to identify a container. |
| string allocationKey = 2; |
| |
| // container scheduling state |
| SchedulingState state = 3; |
| |
| // an optional plain message to explain why it is in such state |
| string reason = 4; |
| } |
| |
| message UpdateConfigurationRequest { |
| // New config what needs to be saved |
| string configs = 1; |
| } |
| |
| message UpdateConfigurationResponse { |
| // flag that marks the config update success or failure |
| bool success = 1; |
| |
| // the old configuration what was changed |
| string oldConfig = 2; |
| |
| // reason in case of failure |
| string reason = 3; |
| } |
| message EventRecord { |
| enum Type { |
| //EventRecord Type not set |
| UNKNOWN_EVENTRECORD_TYPE = 0; |
| REQUEST = 1; |
| APP = 2; |
| NODE = 3; |
| QUEUE = 4; |
| } |
| |
| // the type of the object associated with the event |
| Type type = 1; |
| // ID of the object associated with the event |
| string objectID = 2; |
| // the group this object belongs to |
| // it specifies the application ID for allocations and the queue for applications |
| string groupID = 3; |
| // the reason of this event |
| string reason = 4; |
| // the detailed message as string |
| string message = 5; |
| // timestamp of the event |
| int64 timestampNano = 6; |
| } |