| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.slider.server.appmaster.state; |
| |
| /** |
| * Information about the state of a role on a specific node instance. |
| * No fields are synchronized; sync on the instance to work with it |
| <p> |
| The two fields `releasing` and `requested` are used to track the ongoing |
| state of YARN requests; they do not need to be persisted across stop/start |
| cycles. They may be relevant across AM restart, but without other data |
| structures in the AM, not enough to track what the AM was up to before |
| it was restarted. The strategy will be to ignore unexpected allocation |
| responses (which may come from pre-restart) requests, while treating |
| unexpected container release responses as failures. |
| <p> |
| The `active` counter is only decremented after a container release response |
| has been received. |
| <p> |
| |
| Accesses are synchronized. |
| */ |
| public class NodeEntry { |
| |
| public final int rolePriority; |
| |
| public NodeEntry(int rolePriority) { |
| this.rolePriority = rolePriority; |
| } |
| |
| /** |
| * instance explicitly requested on this node: it's OK if an allocation |
| * comes in that has not been (and when that happens, this count should |
| * not drop) |
| */ |
| private int requested; |
| private int starting; |
| private int startFailed; |
| private int failed; |
| private int preempted; |
| /** |
| * Counter of "failed recently" events. These are all failures |
| * which have happened since it was last reset. |
| */ |
| private int failedRecently; |
| /** |
| * Number of live nodes. |
| */ |
| private int live; |
| private int releasing; |
| private long lastUsed; |
| |
| /** |
| * Is the node available for assignments. This does not track |
| * whether or not there are any outstanding requests for this node |
| * @return true if there are no role instances here |
| * other than some being released. |
| */ |
| public synchronized boolean isAvailable() { |
| return getActive() == 0 && (requested == 0) && starting == 0; |
| } |
| |
| /** |
| * return no of active instances -those that could be released as they |
| * are live and not already being released |
| * @return a number, possibly 0 |
| */ |
| public synchronized int getActive() { |
| return (live - releasing); |
| } |
| |
| /** |
| * Return true if the node is not busy, and it |
| * has not been used since the absolute time |
| * @param absoluteTime time |
| * @return true if the node could be cleaned up |
| */ |
| public synchronized boolean notUsedSince(long absoluteTime) { |
| return isAvailable() && lastUsed < absoluteTime; |
| } |
| |
| public synchronized int getLive() { |
| return live; |
| } |
| |
| public int getStarting() { |
| return starting; |
| } |
| |
| /** |
| * Set the live value directly -used on AM restart |
| * @param v value |
| */ |
| public synchronized void setLive(int v) { |
| live = v; |
| } |
| |
| private synchronized void incLive() { |
| ++live; |
| } |
| |
| private synchronized void decLive() { |
| live = RoleHistoryUtils.decToFloor(live); |
| } |
| |
| public synchronized void onStarting() { |
| ++starting; |
| } |
| |
| private void decStarting() { |
| starting = RoleHistoryUtils.decToFloor(starting); |
| } |
| |
| public synchronized void onStartCompleted() { |
| decStarting(); |
| incLive(); |
| } |
| |
| /** |
| * start failed -decrement the starting flag. |
| * @return true if the node is now available |
| */ |
| public synchronized boolean onStartFailed() { |
| decStarting(); |
| ++startFailed; |
| return containerCompleted(false, ContainerOutcome.Failed); |
| } |
| |
| /** |
| * no of requests made of this role of this node. If it goes above |
| * 1 there's a problem |
| */ |
| public synchronized int getRequested() { |
| return requested; |
| } |
| |
| /** |
| * request a node: |
| */ |
| public synchronized void request() { |
| ++requested; |
| } |
| |
| /** |
| * A request made explicitly to this node has completed |
| */ |
| public synchronized void requestCompleted() { |
| requested = RoleHistoryUtils.decToFloor(requested); |
| } |
| |
| /** |
| * No of instances in release state |
| */ |
| public synchronized int getReleasing() { |
| return releasing; |
| } |
| |
| /** |
| * Release an instance -which is no longer marked as active |
| */ |
| public synchronized void release() { |
| releasing++; |
| } |
| |
| /** |
| * completion event, which can be a planned or unplanned |
| * planned: dec our release count |
| * unplanned: dec our live count |
| * @param wasReleased true if this was planned |
| * @param outcome |
| * @return true if this node is now available |
| */ |
| public synchronized boolean containerCompleted(boolean wasReleased, ContainerOutcome outcome) { |
| if (wasReleased) { |
| releasing = RoleHistoryUtils.decToFloor(releasing); |
| } else { |
| // for the node, we use the outcome of the faiure to decide |
| // whether this is potentially "node-related" |
| switch(outcome) { |
| // general "any reason" app failure |
| case Failed: |
| // specific node failure |
| case Node_failure: |
| |
| ++failed; |
| ++failedRecently; |
| break; |
| |
| case Preempted: |
| preempted++; |
| break; |
| |
| // failures which are node-independent |
| case Failed_limits_exceeded: |
| case Completed: |
| default: |
| break; |
| } |
| } |
| decLive(); |
| return isAvailable(); |
| } |
| |
| /** |
| * Time last used. |
| */ |
| public synchronized long getLastUsed() { |
| return lastUsed; |
| } |
| |
| public synchronized void setLastUsed(long lastUsed) { |
| this.lastUsed = lastUsed; |
| } |
| |
| public synchronized int getStartFailed() { |
| return startFailed; |
| } |
| |
| public synchronized int getFailed() { |
| return failed; |
| } |
| |
| public synchronized int getFailedRecently() { |
| return failedRecently; |
| } |
| |
| public synchronized int getPreempted() { |
| return preempted; |
| } |
| |
| /** |
| * Reset the failed recently count. |
| */ |
| public void resetFailedRecently() { |
| failedRecently = 0; |
| } |
| |
| @Override |
| public String toString() { |
| final StringBuilder sb = new StringBuilder("NodeEntry{"); |
| sb.append("priority=").append(rolePriority); |
| sb.append(", requested=").append(requested); |
| sb.append(", starting=").append(starting); |
| sb.append(", live=").append(live); |
| sb.append(", releasing=").append(releasing); |
| sb.append(", lastUsed=").append(lastUsed); |
| sb.append(", failedRecently=").append(failedRecently); |
| sb.append(", preempted=").append(preempted); |
| sb.append(", startFailed=").append(startFailed); |
| sb.append('}'); |
| return sb.toString(); |
| } |
| } |