SLIDER-1188 Make AM agent heartbeat loss configurable / increase default.
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentKeys.java b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentKeys.java
index 9ea984c..c7f8df2 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentKeys.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentKeys.java
@@ -97,6 +97,8 @@
String PYTHON_EXE = "python";
String CREATE_DEF_ZK_NODE = "create.default.zookeeper.node";
String HEARTBEAT_MONITOR_INTERVAL = "heartbeat.monitor.interval";
+ String HEARTBEAT_LOST_INTERVAL = "heartbeat.lost.interval";
+ int DEFAULT_HEARTBEAT_LOST_INTERVAL = 2 * 60 * 60 * 1000; // 2 hours
String AGENT_INSTANCE_DEBUG_DATA = "agent.instance.debug.data";
String AGENT_OUT_FILE = "slider-agent.out";
String KEY_AGENT_TWO_WAY_SSL_ENABLED = "ssl.server.client.auth";
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java
index 2ab5c6f..7f3b04e 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java
@@ -172,6 +172,7 @@
private final Object syncLock = new Object();
private final ComponentTagProvider tags = new ComponentTagProvider();
private int heartbeatMonitorInterval = 0;
+ private int heartbeatLostInterval = 0;
private AgentClientProvider clientProvider;
private AtomicInteger taskId = new AtomicInteger(0);
private volatile Map<String, MetainfoHolder> metaInfoMap = new HashMap<>();
@@ -230,6 +231,7 @@
super("AgentProviderService");
setAgentRestOperations(this);
setHeartbeatMonitorInterval(DEFAULT_HEARTBEAT_MONITOR_INTERVAL);
+ setHeartbeatLostInterval(DEFAULT_HEARTBEAT_LOST_INTERVAL);
}
@Override
@@ -345,7 +347,8 @@
Map<String, DefaultConfig> defaultConfigs =
initializeDefaultConfigs(fileSystem, appDef, metaInfo);
metaInfoMap.put(mapKey, new MetainfoHolder(metaInfo, defaultConfigs));
- monitor = new HeartbeatMonitor(this, getHeartbeatMonitorInterval());
+ monitor = new HeartbeatMonitor(this, getHeartbeatMonitorInterval(),
+ getHeartbeatLostInterval());
monitor.start();
// build a map from component to metainfo
@@ -1555,7 +1558,8 @@
}
/**
- * Reads and sets the heartbeat monitoring interval. If bad value is provided then log it and set to default.
+ * Reads and sets the heartbeat monitoring interval and heartbeat lost
+ * interval. If bad value is provided then log it and set to default.
*
* @param instanceDefinition
*/
@@ -1572,6 +1576,18 @@
HEARTBEAT_MONITOR_INTERVAL,
DEFAULT_HEARTBEAT_MONITOR_INTERVAL);
}
+ String hbLostInterval = instanceDefinition.getAppConfOperations().
+ getGlobalOptions().getOption(AgentKeys.HEARTBEAT_LOST_INTERVAL,
+ Integer.toString(DEFAULT_HEARTBEAT_LOST_INTERVAL));
+ try {
+ setHeartbeatLostInterval(Integer.parseInt(hbLostInterval));
+ } catch (NumberFormatException e) {
+ log.warn(
+ "Bad value {} for {}. Defaulting to ",
+ hbLostInterval,
+ HEARTBEAT_LOST_INTERVAL,
+ DEFAULT_HEARTBEAT_LOST_INTERVAL);
+ }
}
/**
@@ -1637,6 +1653,11 @@
this.heartbeatMonitorInterval = heartbeatMonitorInterval;
}
+ @VisibleForTesting
+ protected void setHeartbeatLostInterval(int heartbeatLostInterval) {
+ this.heartbeatLostInterval = heartbeatLostInterval;
+ }
+
public void setInUpgradeMode(boolean inUpgradeMode) {
this.isInUpgradeMode = inUpgradeMode;
}
@@ -1692,6 +1713,10 @@
return this.heartbeatMonitorInterval;
}
+ private int getHeartbeatLostInterval() {
+ return this.heartbeatLostInterval;
+ }
+
private String getClusterName() {
if (SliderUtils.isUnset(clusterName)) {
clusterName = getAmState().getInternalsSnapshot().get(OptionKeys.APPLICATION_NAME);
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/HeartbeatMonitor.java b/slider-core/src/main/java/org/apache/slider/providers/agent/HeartbeatMonitor.java
index 4293916..80aea2d 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/HeartbeatMonitor.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/HeartbeatMonitor.java
@@ -25,18 +25,27 @@
import java.util.Map;
+import static org.apache.slider.providers.agent.AgentKeys.DEFAULT_HEARTBEAT_LOST_INTERVAL;
+
/** Monitors the container state and heartbeats. */
public class HeartbeatMonitor implements Runnable {
protected static final Logger log =
LoggerFactory.getLogger(HeartbeatMonitor.class);
private final int threadWakeupInterval; //1 minute
+ private final int heartbeatLostInterval; //2 hours
private final AgentProviderService provider;
private volatile boolean shouldRun = true;
private Thread monitorThread = null;
public HeartbeatMonitor(AgentProviderService provider, int threadWakeupInterval) {
+ this(provider, threadWakeupInterval, DEFAULT_HEARTBEAT_LOST_INTERVAL);
+ }
+
+ public HeartbeatMonitor(AgentProviderService provider,
+ int threadWakeupInterval, int heartbeatLostInterval) {
this.provider = provider;
this.threadWakeupInterval = threadWakeupInterval;
+ this.heartbeatLostInterval = heartbeatLostInterval;
}
public void shutdown() {
@@ -105,7 +114,7 @@
timeSinceLastHeartbeat);
break;
case UNHEALTHY:
- if (timeSinceLastHeartbeat > threadWakeupInterval * 2) {
+ if (timeSinceLastHeartbeat > heartbeatLostInterval) {
componentInstanceState.setContainerState(
ContainerState.HEARTBEAT_LOST);
log.warn(
diff --git a/slider-core/src/test/java/org/apache/slider/providers/agent/TestHeartbeatMonitor.java b/slider-core/src/test/java/org/apache/slider/providers/agent/TestHeartbeatMonitor.java
index 7314b72..5b9b538 100644
--- a/slider-core/src/test/java/org/apache/slider/providers/agent/TestHeartbeatMonitor.java
+++ b/slider-core/src/test/java/org/apache/slider/providers/agent/TestHeartbeatMonitor.java
@@ -40,7 +40,7 @@
@Test
public void testRegularHeartbeat() throws Exception {
AgentProviderService provider = createNiceMock(AgentProviderService.class);
- HeartbeatMonitor hbm = new HeartbeatMonitor(provider, 1 * 1000);
+ HeartbeatMonitor hbm = new HeartbeatMonitor(provider, 1 * 1000, 2 * 1000);
Assert.assertFalse(hbm.isAlive());
expect(provider.getComponentStatuses()).andReturn(null).anyTimes();
replay(provider);
@@ -54,7 +54,7 @@
@Test
public void testHeartbeatMonitorWithHealthy() throws Exception {
AgentProviderService provider = createNiceMock(AgentProviderService.class);
- HeartbeatMonitor hbm = new HeartbeatMonitor(provider, 500);
+ HeartbeatMonitor hbm = new HeartbeatMonitor(provider, 500, 2 * 500);
Assert.assertFalse(hbm.isAlive());
Map<String, ComponentInstanceState> statuses = new HashMap<String, ComponentInstanceState>();
ContainerId container1 = new MockContainerId(1);
@@ -101,7 +101,7 @@
HeartbeatMonitor heartbeatMonitor = new HeartbeatMonitor(provider,
- wakeupInterval);
+ wakeupInterval, 2 * wakeupInterval);
Assert.assertFalse(heartbeatMonitor.isAlive());
now += wakeupInterval;
masterState.setState(State.STARTED);