YARN-4019. Add JvmPauseMonitor to ResourceManager and NodeManager. Contributed by Robert Kanter.
(cherry picked from commit cfee02b3bdd1117370200c9d8ce216676cff8888)
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 5cbb1b7..460d602 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -322,6 +322,9 @@
YARN-3961. Expose pending, running and reserved containers of a queue in REST
api and yarn top (adhoot via asuresh)
+ YARN-4019. Add JvmPauseMonitor to ResourceManager and NodeManager. (Robert Kanter
+ via junping_du)
+
OPTIMIZATIONS
YARN-3339. TestDockerContainerExecutor should pull a single image and not
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
index b8889ee..a06293d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
@@ -40,6 +40,7 @@
import org.apache.hadoop.service.CompositeService;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.JvmPauseMonitor;
import org.apache.hadoop.util.NodeHealthScriptRunner;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.ShutdownHookManager;
@@ -83,6 +84,7 @@
private static final Log LOG = LogFactory.getLog(NodeManager.class);
private static long nmStartupTime = System.currentTimeMillis();
protected final NodeManagerMetrics metrics = NodeManagerMetrics.create();
+ private JvmPauseMonitor pauseMonitor;
private ApplicationACLsManager aclsManager;
private NodeHealthCheckerService nodeHealthChecker;
private NodeLabelsProvider nodeLabelsProvider;
@@ -307,13 +309,16 @@
dispatcher.register(ContainerManagerEventType.class, containerManager);
dispatcher.register(NodeManagerEventType.class, this);
addService(dispatcher);
-
+
+ pauseMonitor = new JvmPauseMonitor(conf);
+ metrics.getJvmMetrics().setPauseMonitor(pauseMonitor);
+
DefaultMetricsSystem.initialize("NodeManager");
// StatusUpdater should be added last so that it get started last
// so that we make sure everything is up before registering with RM.
addService(nodeStatusUpdater);
-
+
super.serviceInit(conf);
// TODO add local dirs to del
}
@@ -325,6 +330,7 @@
} catch (IOException e) {
throw new YarnRuntimeException("Failed NodeManager login", e);
}
+ pauseMonitor.start();
super.serviceStart();
}
@@ -336,6 +342,9 @@
try {
super.serviceStop();
DefaultMetricsSystem.shutdown();
+ if (pauseMonitor != null) {
+ pauseMonitor.stop();
+ }
} finally {
// YARN-3641: NM's services stop get failed shouldn't block the
// release of NMLevelDBStore.
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
index 400f14b..56797d1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
@@ -57,17 +57,26 @@
@Metric("Disk utilization % on good log dirs")
MutableGaugeInt goodLogDirsDiskUtilizationPerc;
+ private JvmMetrics jvmMetrics = null;
private long allocatedMB;
private long availableMB;
+ public NodeManagerMetrics(JvmMetrics jvmMetrics) {
+ this.jvmMetrics = jvmMetrics;
+ }
+
public static NodeManagerMetrics create() {
return create(DefaultMetricsSystem.instance());
}
static NodeManagerMetrics create(MetricsSystem ms) {
- JvmMetrics.create("NodeManager", null, ms);
- return ms.register(new NodeManagerMetrics());
+ JvmMetrics jm = JvmMetrics.create("NodeManager", null, ms);
+ return ms.register(new NodeManagerMetrics(jm));
+ }
+
+ public JvmMetrics getJvmMetrics() {
+ return jvmMetrics;
}
// Potential instrumentation interface methods
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
index 1b606b4..817565b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
@@ -39,6 +39,7 @@
import org.apache.hadoop.service.Service;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.JvmPauseMonitor;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.ShutdownHookManager;
import org.apache.hadoop.util.StringUtils;
@@ -157,6 +158,7 @@
private WebApp webApp;
private AppReportFetcher fetcher = null;
protected ResourceTrackerService resourceTracker;
+ private JvmPauseMonitor pauseMonitor;
@VisibleForTesting
protected String webAppAddress;
@@ -511,7 +513,9 @@
rmContext.setResourceTrackerService(resourceTracker);
DefaultMetricsSystem.initialize("ResourceManager");
- JvmMetrics.initSingleton("ResourceManager", null);
+ JvmMetrics jm = JvmMetrics.initSingleton("ResourceManager", null);
+ pauseMonitor = new JvmPauseMonitor(conf);
+ jm.setPauseMonitor(pauseMonitor);
// Initialize the Reservation system
if (conf.getBoolean(YarnConfiguration.RM_RESERVATION_SYSTEM_ENABLE,
@@ -566,6 +570,8 @@
// need events to move to further states.
rmStore.start();
+ pauseMonitor.start();
+
if(recoveryEnabled) {
try {
LOG.info("Recovery started");
@@ -591,6 +597,9 @@
protected void serviceStop() throws Exception {
DefaultMetricsSystem.shutdown();
+ if (pauseMonitor != null) {
+ pauseMonitor.stop();
+ }
if (rmContext != null) {
RMStateStore store = rmContext.getStateStore();