| |
| // Code generated by make build. DO NOT EDIT |
| |
| syntax = "proto3"; |
| package si.v1; |
| |
| import "google/protobuf/descriptor.proto"; |
| |
| option go_package = "si"; |
| |
| extend google.protobuf.FieldOptions { |
| // Indicates that a field MAY contain information that is sensitive |
| // and MUST be treated as such (e.g. not logged). |
| bool si_secret = 1059; |
| } |
| service Scheduler { |
| // Register a RM, if it is a reconnect from previous RM the call will |
| // trigger a cleanup of all in-memory data and resync with RM. |
| rpc RegisterResourceManager (RegisterResourceManagerRequest) |
| returns (RegisterResourceManagerResponse) { } |
| |
| // Update Scheduler status (this includes node status update, allocation request |
| // updates, etc. And receive updates from scheduler for allocation changes, |
| // any required status changes, etc. |
| rpc Update (stream UpdateRequest) |
| returns (stream UpdateResponse) { } |
| } |
| |
| /* |
| service AdminService { |
| // Include |
| // addQueueInfo. |
| // removeQueueInfo. |
| // updateQueueInfo. |
| rpc UpdateConfig (UpdateConfigRequest) |
| returns (UpdateConfigResponse) {} |
| } |
| */ |
| |
| /* |
| service MetricsService { |
| } |
| */ |
| message RegisterResourceManagerRequest { |
| // An ID which can uniquely identify a RM **cluster**. (For example, if a RM cluster has multiple manager instances for HA purpose, they should use the same information when do registration). |
| // If RM register with the same ID, all previous scheduling state in memory will be cleaned up, and expect RM report full scheduling state after registration. |
| string rmID = 1; |
| |
| // Version of RM scheduler interface client. |
| string version = 2; |
| |
| // Policy group name: |
| // This defines which policy to use. Policy should be statically configured. (Think about network security group concept of ec2). |
| // Different RMs can refer to the same policyGroup if their static configuration is identical. |
| string policyGroup = 3; |
| } |
| |
| // Upon success, scheduler returns RegisterResourceManagerResponse to RM, otherwise RM receives exception. |
| message RegisterResourceManagerResponse { |
| // Intentionally empty. |
| } |
| message UpdateRequest { |
| // New allocation requests or replace existing allocation request (if allocationID is same) |
| repeated AllocationAsk asks = 1; |
| |
| // Allocations can be released. |
| AllocationReleasesRequest releases = 2; |
| |
| // New node can be scheduled. If a node is notified to be "unscheduable", it needs to be part of this field as well. |
| repeated NewNodeInfo newSchedulableNodes = 3; |
| |
| // Update nodes for existing schedulable nodes. |
| // May include: |
| // - Node resource changes. (Like grows/shrinks node resource) |
| // - Node attribute changes. (Including node-partition concept like YARN, and concept like "local images". |
| // |
| // Should not include: |
| // - Allocation-related changes with the node. |
| // - Realtime Utilizations. |
| repeated UpdateNodeInfo updatedNodes = 4; |
| |
| // UtilizationReports for allocation and nodes. |
| repeated UtilizationReport utilizationReports = 5; |
| |
| // ID of RM, this will be used to identify which RM of the request comes from. |
| string rmID = 6; |
| |
| // RM should explicitly add application when allocation request also explictly belongs to application. |
| // This is optional if allocation request doesn't belong to a application. (Independent allocation) |
| repeated AddApplicationRequest newApplications = 8; |
| |
| // RM can also remove applications, all allocation/allocation requests associated with the application will be removed |
| repeated RemoveApplicationRequest removeApplications = 9; |
| } |
| |
| message UpdateResponse { |
| // Scheduler can send action to RM. |
| enum ActionFromScheduler { |
| // Nothing needs to do |
| NOACTION = 0; |
| |
| // Something is wrong, RM needs to stop the RM, and re-register with scheduler. |
| RESYNC = 1; |
| } |
| |
| // What RM needs to do, scheduler can send control code to RM when something goes wrong. |
| // Don't use/expand this field for other general purposed actions. (Like kill a remote container process). |
| ActionFromScheduler action = 1; |
| |
| // New allocations |
| repeated Allocation newAllocations = 2; |
| |
| // Released allocations, this could be either ack from scheduler when RM asks to terminate some allocations. |
| // Or it could be decision made by scheduler (such as preemption or timeout). |
| repeated AllocationRelease releasedAllocations = 3; |
| |
| // Released allocation asks(placeholder), when the placeholder allocation times out |
| repeated AllocationAskRelease releasedAllocationAsks = 4; |
| |
| // Rejected allocation requests |
| repeated RejectedAllocationAsk rejectedAllocations = 5; |
| |
| // Suggested node update. |
| // This could include: |
| // 1) Schedulable resources on each node. This can be used when we want to run |
| // two resource management systems side-by-side. For example, YARN/K8s running side by side. |
| // and update YARN NodeManager / Kubelet resource dynamically. |
| // 2) Other recommendations. |
| repeated NodeRecommendation nodeRecommendations = 6; |
| |
| // Rejected Applications |
| repeated RejectedApplication rejectedApplications = 7; |
| |
| // Accepted Applications |
| repeated AcceptedApplication acceptedApplications = 8; |
| |
| // Updated Applications |
| repeated UpdatedApplication updatedApplications = 9; |
| |
| // Rejected Node Registrations |
| repeated RejectedNode rejectedNodes = 10; |
| |
| // Accepted Node Registrations |
| repeated AcceptedNode acceptedNodes = 11; |
| } |
| |
| message UpdatedApplication { |
| // The application ID that was updated |
| string applicationID = 1; |
| // State of the application |
| string state = 2; |
| // Timestamp of the state transition |
| int64 stateTransitionTimestamp = 3; |
| // Detailed message |
| string message = 4; |
| } |
| |
| message RejectedApplication { |
| // The application ID that was rejected |
| string applicationID = 1; |
| // A human-readable reason message |
| string reason = 2; |
| } |
| |
| message AcceptedApplication { |
| // The application ID that was accepted |
| string applicationID = 1; |
| } |
| |
| message RejectedNode { |
| // The node ID that was rejected |
| string nodeID = 1; |
| // A human-readable reason message |
| string reason = 2; |
| } |
| |
| message AcceptedNode { |
| // The node ID that was accepted |
| string nodeID = 1; |
| } |
| message Priority { |
| oneof priority { |
| // Priority of each ask, higher is more important. |
| // How to deal with Priority is handled by each scheduler implementation. |
| int32 priorityValue = 1; |
| |
| // PriorityClass is used for app owners to set named priorities. This is a portable way for |
| // app owners have a consistent way to setup priority across clusters |
| string priorityClassName = 2; |
| } |
| } |
| |
| // A sparse map of resource to Quantity. |
| message Resource { |
| map<string, Quantity> resources = 1; |
| } |
| |
| // Quantity includes a single int64 value |
| message Quantity { |
| int64 value = 1; |
| } |
| message AllocationAsk { |
| // Allocation key is used by both of scheduler and RM to track allocations. |
| // It doesn't have to be same as RM's internal allocation id (such as Pod name of K8s or ContainerID of YARN). |
| // Allocations from the same AllocationAsk which are returned to the RM at the same time will have the same allocationKey. |
| // The request is considered an update of the existing AllocationAsk if an ALlocationAsk with the same allocationKey |
| // already exists. |
| string allocationKey = 1; |
| // The application ID this allocation ask belongs to |
| string applicationID = 2; |
| // The partition the application belongs to |
| string partitionName = 3; |
| // The amount of resources per ask |
| Resource resourceAsk = 4; |
| // Maximum number of allocations |
| int32 maxAllocations = 5; |
| // Priority of ask |
| Priority priority = 6; |
| // Execution timeout: How long this allocation will be terminated (by scheduler) |
| // once allocated by scheduler, 0 or negative value means never expire. |
| int64 executionTimeoutMilliSeconds = 7; |
| // A set of tags for this spscific AllocationAsk. Allocation level tags are used in placing this specific |
| // ask on nodes in the cluster. These tags are used in the PlacementConstraints. |
| // These tags are optional. |
| map<string, string> tags = 8; |
| // Placement constraint defines how this allocation should be placed in the cluster. |
| // if not set, no placement constraint will be applied. |
| PlacementConstraint placementConstraint = 9; |
| // The name of the TaskGroup this ask belongs to |
| string taskGroupName = 10; |
| // Is this a placeholder ask (true) or a real ask (false), defaults to false |
| // ignored if the taskGroupName is not set |
| bool placeholder = 11; |
| } |
| message AddApplicationRequest { |
| // The ID of the application, must be unique |
| string applicationID = 1; |
| // The queue this application is requesting. The scheduler will place the application into a |
| // queue according to policy, taking into account the requested queue as per the policy. |
| string queueName = 2; |
| // The partition the application belongs to |
| string partitionName = 3; |
| // The user group information of the application owner |
| UserGroupInformation ugi = 4; |
| // A set of tags for the application. These tags provide application level generic inforamtion. |
| // The tags are optional and are used in placing an appliction or scheduling. |
| // Application tags are not considered when processing AllocationAsks. |
| map<string, string> tags = 5; |
| // Execution timeout: How long this application can be in a running state |
| // 0 or negative value means never expire. |
| int64 executionTimeoutMilliSeconds = 6; |
| // The total amount of resources gang placeholders will request |
| Resource placeholderAsk = 7; |
| } |
| |
| message RemoveApplicationRequest { |
| // The ID of the application to remove |
| string applicationID = 1; |
| // The partition the application belongs to |
| string partitionName = 2; |
| } |
| message UserGroupInformation { |
| // the user name |
| string user = 1; |
| // the list of groups of the user, can be empty |
| repeated string groups = 2; |
| } |
| // PlacementConstraint could have simplePlacementConstraint or |
| // CompositePlacementConstraint. One of them will be set. |
| message PlacementConstraint { |
| oneof constraint { |
| SimplePlacementConstraint simpleConstraint = 1; |
| |
| // This protocol can extended to support complex constraints |
| // To make an easier scheduler implementation and avoid confusing user. |
| // Protocol related to CompositePlacementConstraints will be |
| // commented and only for your references. |
| // CompositePlacementConstraint compositeConstraint = 2; |
| } |
| } |
| |
| // Simple placement constraint represent constraint for affinity/anti-affinity |
| // to node attribute or allocation tags. |
| // When both of NodeAffinityConstraints and AllocationAffinityConstraints |
| // specified, both will be checked and verified while scheduling. |
| message SimplePlacementConstraint { |
| // Constraint |
| NodeAffinityConstraints nodeAffinityConstraint = 1; |
| AllocationAffinityConstraints allocationAffinityAttribute = 2; |
| } |
| |
| // Affinity to node, multiple AffinityTargetExpression will be specified, |
| // They will be connected by AND. |
| message NodeAffinityConstraints { |
| repeated AffinityTargetExpression targetExpressions = 2; |
| } |
| |
| // Affinity to allocations (containers). |
| // Affinity is single-direction, which means if RM wants to do mutual affinity/ |
| // anti-affinity between allocations, same constraints need to be added |
| // to all allocation asks. |
| message AllocationAffinityConstraints { |
| // Scope: scope is key of node attribute, which determines if >1 allocations |
| // in the same group or not. |
| // When allocations on node(s) which have same node attribute value |
| // for given node attribute key == scope. They're in the same group. |
| // |
| // e.g. when user wants to do anti-affinity between allocation on node |
| // basis, scope can be set to "hostname", max-cardinality = 1; |
| string scope = 1; |
| repeated AffinityTargetExpression tragetExpressions = 2; |
| int32 minCardinality = 3; |
| int32 maxCardinality = 4; |
| |
| // Is this a required (hard) or preferred (soft) request. |
| bool required = 5; |
| } |
| |
| message AffinityTargetExpression { |
| // Following 4 operators can be specified, by default is "IN". |
| // When EXIST/NOT_EXISTS specified, scheduler only check if given targetKey |
| // appears on node attribute or allocation tag. |
| enum AffinityTargetOperator { |
| IN = 0; |
| NOT_IN = 1; |
| EXIST = 2; |
| NOT_EXIST = 3; |
| } |
| |
| AffinityTargetExpression targetOperator = 1; |
| string targetKey = 2; |
| repeated string targetValues = 3; |
| } |
| message Allocation { |
| // AllocationKey from AllocationAsk |
| string allocationKey = 1; |
| // Allocation tags from AllocationAsk |
| map<string, string> allocationTags = 2; |
| // UUID of the allocation |
| string UUID = 3; |
| // Resource for each allocation |
| Resource resourcePerAlloc = 5; |
| // Priority of ask |
| Priority priority = 6; |
| // Queue which the allocation belongs to |
| string queueName = 7; |
| // Node which the allocation belongs to |
| string nodeID = 8; |
| // The ID of the application |
| string applicationID = 9; |
| // Partition of the allocation |
| string partitionName = 10; |
| // The name of the TaskGroup this allocation belongs to |
| string taskGroupName = 11; |
| // Is this a placeholder allocation (true) or a real allocation (false), defaults to false |
| // ignored if the taskGroupName is not set |
| bool placeholder = 12; |
| } |
| message AllocationReleasesRequest { |
| // The allocations to release |
| repeated AllocationRelease allocationsToRelease = 1; |
| // The asks to release |
| repeated AllocationAskRelease allocationAsksToRelease = 2; |
| } |
| |
| enum TerminationType { |
| STOPPED_BY_RM = 0; // Stopped or killed by ResourceManager (created by RM) |
| TIMEOUT = 1; // Timed out based on the executionTimeoutMilliSeconds (created by core) |
| PREEMPTED_BY_SCHEDULER = 2; // Preempted allocation by scheduler (created by core) |
| PLACEHOLDER_REPLACED = 3; // Placeholder allocation replaced by real allocation (created by core) |
| } |
| |
| // Release allocation: this is a bidirectional message. The Terminationtype defines the origin, or creator, |
| // as per the comment. The confirmation or response from the receiver is the same message with the same |
| // termination type set. |
| message AllocationRelease { |
| |
| // The name of the partition the allocation belongs to |
| string partitionName = 1; |
| // The application the allocation belongs to |
| string applicationID = 2; |
| // The UUID of the allocation to release, if not set all allocations are released for |
| // the applicationID |
| string UUID = 3; |
| // Termination type of the released allocation |
| TerminationType terminationType = 4; |
| // human-readable message |
| string message = 5; |
| } |
| |
| // Release ask |
| message AllocationAskRelease { |
| // Which partition to release the ask from, required. |
| string partitionName = 1; |
| // optional, when this is set, filter allocation key by application id. |
| // when application id is set and allocationKey is not set, release all allocations key under the application id. |
| string applicationID = 2; |
| // optional, when this is set, only release allocation ask by specified |
| string allocationkey = 3; |
| // Termination type of the released allocation ask |
| TerminationType terminationType = 4; |
| // For human-readable message |
| string message = 5; |
| } |
| message NewNodeInfo { |
| // ID of node, must be unique |
| string nodeID = 1; |
| // node attributes |
| map<string, string> attributes = 2; |
| // Schedulable Resource |
| Resource schedulableResource = 3; |
| // Occupied Resource |
| Resource occupiedResource = 4; |
| // Allocated resources, this will be added when node registered to RM (recovery) |
| repeated Allocation existingAllocations = 5; |
| } |
| message UpdateNodeInfo { |
| // Action from RM |
| enum ActionFromRM { |
| // Update node resources, attributes. |
| UPDATE = 0; |
| |
| // Do not allocate new allocations on the node. |
| DRAIN_NODE = 1; |
| |
| // Decomission node, it will immediately stop allocations on the node and |
| // remove the node from schedulable lists. |
| DECOMISSION = 2; |
| |
| // From Draining state to SCHEDULABLE state. |
| // If node is not in draining state, error will be thrown |
| DRAIN_TO_SCHEDULABLE = 3; |
| } |
| |
| // ID of node, the node must exist to be updated |
| string nodeID = 1; |
| // New attributes of node, which will replace previously reported attribute. |
| map<string, string> attributes = 2; |
| // new schedulable resource, scheduler may preempt allocations on the |
| // node or schedule more allocations accordingly. |
| Resource schedulableResource = 3; |
| // when the scheduler is co-exist with some other schedulers, some node |
| // resources might be occupied (allocated) by other schedulers. |
| Resource occupiedResource = 4; |
| // Action to perform by the scheduler |
| ActionFromRM action = 5; |
| } |
| message UtilizationReport { |
| // it could be either a nodeID or allocation UUID. |
| string ID = 1; |
| |
| // Actual used resource |
| Resource actualUsedResource = 2; |
| } |
| message RejectedAllocationAsk { |
| string allocationKey = 1; |
| // The ID of the application |
| string applicationID = 2; |
| // A human-readable reason message |
| string reason = 3; |
| } |
| message NodeRecommendation { |
| Resource recommendedSchedulableResource = 1; |
| |
| // Any other human-readable message |
| string message = 2; |
| } |
| message PredicatesArgs { |
| // allocation key identifies a container, the predicates function is going to check |
| // if this container is eligible to be placed ont to a node. |
| string allocationKey = 1; |
| // the node ID the container is assigned to. |
| string nodeID = 2; |
| // run the predicates for alloactions (true) or reservations (false) |
| bool allocate = 3; |
| } |
| |
| message ReSyncSchedulerCacheArgs { |
| // a list of assumed allocations, this will be sync'd to scheduler cache. |
| repeated AssumedAllocation assumedAllocations = 1; |
| // a list of allocations to forget |
| repeated ForgotAllocation forgetAllocations = 2; |
| } |
| |
| message AssumedAllocation { |
| // allocation key used to identify a container. |
| string allocationKey = 1; |
| // the node ID the container is assumed to be allocated to, this info is stored in scheduler cache. |
| string nodeID = 2; |
| } |
| |
| message ForgotAllocation { |
| // allocation key used to identify a container. |
| string allocationKey = 1; |
| } |
| |
| message UpdateContainerSchedulingStateRequest { |
| // container scheduling states |
| enum SchedulingState { |
| // the container is being skipped by the scheduler |
| SKIPPED = 0; |
| // the container is scheduled and it has been assigned to a node |
| SCHEDULED = 1; |
| // the container is reserved on some node, but not yet assigned |
| RESERVED = 2; |
| // scheduler has visited all candidate nodes for this container |
| // but non of them could satisfy this container's requirement |
| FAILED = 3; |
| } |
| |
| // application ID |
| string applicartionID = 1; |
| |
| // allocation key used to identify a container. |
| string allocationKey = 2; |
| |
| // container scheduling state |
| SchedulingState state = 3; |
| |
| // an optional plain message to explain why it is in such state |
| string reason = 4; |
| } |
| |
| message UpdateConfigurationRequest { |
| // New config what needs to be saved |
| string configs = 1; |
| } |
| |
| message UpdateConfigurationResponse { |
| // flag that marks the config update success or failure |
| bool success = 1; |
| |
| // the old configuration what was changed |
| string oldConfig = 2; |
| |
| // reason in case of failure |
| string reason = 3; |
| } |
| message EventRecord { |
| enum Type { |
| REQUEST = 0; |
| APP = 1; |
| NODE = 2; |
| QUEUE = 3; |
| } |
| |
| // the type of the object associated with the event |
| Type type = 1; |
| // ID of the object associated with the event |
| string objectID = 2; |
| // the group this object belongs to |
| // it specifies the application ID for allocations and the queue for applications |
| string groupID = 3; |
| // the reason of this event |
| string reason = 4; |
| // the detailed message as string |
| string message = 5; |
| // timestamp of the event |
| int64 timestampNano = 6; |
| } |
| // auto scaling metrics at a certain point of time |
| message AutoScalingMetrics { |
| // a list of outstanding requests that desires for additional resources |
| repeated OutstandingResourceRequest outstandingRequests = 1; |
| } |
| |
| message OutstandingResourceRequest { |
| // an unique ID |
| string requestID = 1; |
| // resource specification |
| Resource resource = 2; |
| // an arbitrary map for tags, this stores some useful information that can help the decision |
| map<string, string> tags = 3; |
| } |