Close AsyncHttpClient on scheduler shutdown.
Convert SlaManager into an AbstractIdleService and explicitly
close the AsyncHttpClient on scheduler shutdown. Otherwise
we run the rise of having a stuck scheduler JVM that is unable
to shutdown due to any on the remaining non-daemon http client
threads.
Testing Done:
./gradlew test
**Tested in vagrant:**
Jun 15 20:48:53 aurora aurora-scheduler[8719]: I0615 20:48:53.456 [BlockingDriverJoin, StateMachine] SchedulerLifecycle state machine transition DEAD -> DEAD
Jun 15 20:48:53 aurora aurora-scheduler[8719]: I0615 20:48:53.457 [BlockingDriverJoin, SchedulerLifecycle] Shutdown already invoked, ignoring extra call.
Jun 15 20:48:53 aurora aurora-scheduler[8719]: I0615 20:48:53.458 [TearDownShutdownRegistry STOPPING, StateMachine] storage state machine transition READY -> STOPPED
Jun 15 20:48:53 aurora aurora-scheduler[8719]: I0615 20:48:53.459 [TearDownShutdownRegistry STOPPING, Lifecycle] Shutting down application
Jun 15 20:48:53 aurora aurora-scheduler[8719]: I0615 20:48:53.459 [TearDownShutdownRegistry STOPPING, ShutdownRegistry$ShutdownRegistryImpl] Action controller has already completed, subsequent calls ignored.
Jun 15 20:48:53 aurora aurora-scheduler[8719]: I0615 20:48:53.461 [main, SchedulerMain] Stopping scheduler services.
**Jun 15 20:48:53 aurora aurora-scheduler[8719]: I0615 20:48:53.470 [SlaManager$$EnhancerByGuice$$40d3047 STOPPING, SlaManager] Shutting down SlaManager async http client.**
Jun 15 20:48:53 aurora aurora-scheduler[8719]: I0615 20:48:53.475 [CronLifecycle STOPPING, CronLifecycle] Shutting down Quartz cron scheduler.
...
Jun 15 20:48:56 aurora aurora-scheduler[8719]: I0615 20:48:56.167 [main, SchedulerMain] Application run() exited.
Bugs closed: AURORA-1990
Reviewed at https://reviews.apache.org/r/67613/
diff --git a/src/main/java/org/apache/aurora/scheduler/events/WebhookModule.java b/src/main/java/org/apache/aurora/scheduler/events/WebhookModule.java
index 5ad1251..ac3884c 100644
--- a/src/main/java/org/apache/aurora/scheduler/events/WebhookModule.java
+++ b/src/main/java/org/apache/aurora/scheduler/events/WebhookModule.java
@@ -80,6 +80,7 @@
if (webhookConfig.isPresent()) {
WebhookInfo webhookInfo = parseWebhookConfig(webhookConfig.get());
DefaultAsyncHttpClientConfig config = new DefaultAsyncHttpClientConfig.Builder()
+ .setThreadPoolName("WebHook-AsyncHttpClient")
.setConnectTimeout(webhookInfo.getConnectonTimeoutMsec())
.setHandshakeTimeout(webhookInfo.getConnectonTimeoutMsec())
.setSslSessionTimeout(webhookInfo.getConnectonTimeoutMsec())
diff --git a/src/main/java/org/apache/aurora/scheduler/sla/SlaManager.java b/src/main/java/org/apache/aurora/scheduler/sla/SlaManager.java
index 98bec48..9c5caf4 100644
--- a/src/main/java/org/apache/aurora/scheduler/sla/SlaManager.java
+++ b/src/main/java/org/apache/aurora/scheduler/sla/SlaManager.java
@@ -30,6 +30,7 @@
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.ImmutableList;
+import com.google.common.util.concurrent.AbstractIdleService;
import com.google.common.util.concurrent.Striped;
import com.google.gson.Gson;
import com.google.inject.Inject;
@@ -71,8 +72,9 @@
* Provides methods for performing SLA-safe work. It is used for maintenance and job update
* operations to guarantee that a job's SLA requirements are always satisfied.
*/
-public class SlaManager {
+public class SlaManager extends AbstractIdleService {
private static final Logger LOG = LoggerFactory.getLogger(SlaManager.class);
+
@VisibleForTesting
@Qualifier
@Target({ FIELD, PARAMETER, METHOD }) @Retention(RUNTIME)
@@ -434,4 +436,15 @@
}
return true;
}
+
+ @Override
+ protected void startUp() {
+ //no-op
+ }
+
+ @Override
+ protected void shutDown() throws Exception {
+ LOG.info("Shutting down SlaManager async http client.");
+ httpClient.close();
+ }
}
diff --git a/src/main/java/org/apache/aurora/scheduler/sla/SlaModule.java b/src/main/java/org/apache/aurora/scheduler/sla/SlaModule.java
index 07082a9..27bbaa8 100644
--- a/src/main/java/org/apache/aurora/scheduler/sla/SlaModule.java
+++ b/src/main/java/org/apache/aurora/scheduler/sla/SlaModule.java
@@ -131,6 +131,7 @@
SchedulerServicesModule.addSchedulerActiveServiceBinding(binder()).to(SlaUpdater.class);
DefaultAsyncHttpClientConfig config = new DefaultAsyncHttpClientConfig.Builder()
+ .setThreadPoolName("SlaManager-AsyncHttpClient")
.setConnectTimeout(options.slaCoordinatorTimeout.as(Time.MILLISECONDS).intValue())
.setHandshakeTimeout(options.slaCoordinatorTimeout.as(Time.MILLISECONDS).intValue())
.setSslSessionTimeout(options.slaCoordinatorTimeout.as(Time.MILLISECONDS).intValue())
@@ -159,6 +160,7 @@
"SlaManager-%d", LOG));
bind(SlaManager.class).in(javax.inject.Singleton.class);
+ SchedulerServicesModule.addSchedulerActiveServiceBinding(binder()).to(SlaManager.class);
}
// TODO(ksweeney): This should use AbstractScheduledService.