YARN-9821. NM hangs at serviceStop when ATSV2 Backend Hbase is Down. Contributed by Prabhu Joseph.
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/HBaseTimelineWriterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/HBaseTimelineWriterImpl.java
index a398feb..dda004d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/HBaseTimelineWriterImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/HBaseTimelineWriterImpl.java
@@ -630,39 +630,49 @@
*/
@Override
protected void serviceStop() throws Exception {
- if (entityTable != null) {
- LOG.info("closing the entity table");
- // The close API performs flushing and releases any resources held
- entityTable.close();
+ boolean isStorageUp = true;
+ try {
+ storageMonitor.checkStorageIsUp();
+ } catch (IOException e) {
+ LOG.warn("Failed to close the timeline tables as Hbase is down", e);
+ isStorageUp = false;
}
- if (appToFlowTable != null) {
- LOG.info("closing the app_flow table");
- // The close API performs flushing and releases any resources held
- appToFlowTable.close();
- }
- if (applicationTable != null) {
- LOG.info("closing the application table");
- applicationTable.close();
- }
- if (flowRunTable != null) {
- LOG.info("closing the flow run table");
- // The close API performs flushing and releases any resources held
- flowRunTable.close();
- }
- if (flowActivityTable != null) {
- LOG.info("closing the flowActivityTable table");
- // The close API performs flushing and releases any resources held
- flowActivityTable.close();
- }
- if (subApplicationTable != null) {
- subApplicationTable.close();
- }
- if (domainTable != null) {
- domainTable.close();
- }
- if (conn != null) {
- LOG.info("closing the hbase Connection");
- conn.close();
+
+ if (isStorageUp) {
+ if (entityTable != null) {
+ LOG.info("closing the entity table");
+ // The close API performs flushing and releases any resources held
+ entityTable.close();
+ }
+ if (appToFlowTable != null) {
+ LOG.info("closing the app_flow table");
+ // The close API performs flushing and releases any resources held
+ appToFlowTable.close();
+ }
+ if (applicationTable != null) {
+ LOG.info("closing the application table");
+ applicationTable.close();
+ }
+ if (flowRunTable != null) {
+ LOG.info("closing the flow run table");
+ // The close API performs flushing and releases any resources held
+ flowRunTable.close();
+ }
+ if (flowActivityTable != null) {
+ LOG.info("closing the flowActivityTable table");
+ // The close API performs flushing and releases any resources held
+ flowActivityTable.close();
+ }
+ if (subApplicationTable != null) {
+ subApplicationTable.close();
+ }
+ if (domainTable != null) {
+ domainTable.close();
+ }
+ if (conn != null) {
+ LOG.info("closing the hbase Connection");
+ conn.close();
+ }
}
storageMonitor.stop();
super.serviceStop();