CW1542: Fix thread leak in RemoteEndHostEndPoint (#54)
This fixes the following:
- Fixes thread leaks in RemoteEndHostEndPoint
- Fixes a potential NPE while finding EP for a storage/scope
Unbounded thread growth can be reproduced with following findings:
- Every unreachable template would produce 6 new threads (in a single
ScheduledExecutorService instance) spaced by 10 seconds
- Every reachable template url without the template would produce 1 new
thread (and one ScheduledExecutorService instance), it errors out quickly without
causing more thread growth.
- Every valid url will produce upto 10 threads as the same ep (endpoint
instance) will be reused to query upload/download (async callback)
progresses.
Every RemoteHostEndPoint instances creates its own
ScheduledExecutorService instance which is why in the jstack dump, we
see several threads that share the prefix RemoteHostEndPoint-{1..10}
(given poolsize is defined as 10, it uses suffixes 1-10).
This fixes the discovered thread leakage with following notes:
- Instead of ScheduledExecutorService instance, a cached pool could be
used instead and was implemented, and with `static` scope to be reused
among other future RemoteHostEndPoint instances.
- It was not clear why we would want to wait when we've Answers returned
from the remote EP, and therefore a scheduled/delayed Runnable was
not required at all for processing answers. ScheduledExecutorService
was therefore not really required, moved to ExecutorService instead.
- Another benefit of using a cached pool is that it will shutdown
threads if they are not used in 60 seconds, and they get re-used for
future runnable submissions.
- Caveat: the executor service is still unbounded, however, the use-case
that this method is used for short jobs to check upload/download
progresses fits the case here.
- Refactored CmdRunner to not use/reference objects from parent class.
Signed-off-by: Rohit Yadav <rohit.yadav@shapeblue.com>
diff --git a/engine/storage/src/org/apache/cloudstack/storage/RemoteHostEndPoint.java b/engine/storage/src/org/apache/cloudstack/storage/RemoteHostEndPoint.java
index 1f59cc6..f09eff9 100644
--- a/engine/storage/src/org/apache/cloudstack/storage/RemoteHostEndPoint.java
+++ b/engine/storage/src/org/apache/cloudstack/storage/RemoteHostEndPoint.java
@@ -18,17 +18,15 @@
*/
package org.apache.cloudstack.storage;
+import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
-import java.util.concurrent.ScheduledExecutorService;
-import java.util.concurrent.TimeUnit;
import javax.inject.Inject;
-import org.apache.log4j.Logger;
-
import org.apache.cloudstack.engine.subsystem.api.storage.EndPoint;
import org.apache.cloudstack.framework.async.AsyncCompletionCallback;
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
+import org.apache.log4j.Logger;
import com.cloud.agent.AgentManager;
import com.cloud.agent.Listener;
@@ -54,9 +52,11 @@
public class RemoteHostEndPoint implements EndPoint {
private static final Logger s_logger = Logger.getLogger(RemoteHostEndPoint.class);
+
private long hostId;
private String hostAddress;
private String publicAddress;
+
@Inject
AgentManager agentMgr;
@Inject
@@ -65,10 +65,10 @@
protected SecondaryStorageVmDao vmDao;
@Inject
protected HostDao _hostDao;
- private ScheduledExecutorService executor;
+
+ private static ExecutorService executorService = Executors.newCachedThreadPool(new NamedThreadFactory("RemoteHostEndPoint"));
public RemoteHostEndPoint() {
- executor = Executors.newScheduledThreadPool(10, new NamedThreadFactory("RemoteHostEndPoint"));
}
private void configure(Host host) {
@@ -134,17 +134,17 @@
}
private class CmdRunner extends ManagedContextRunnable implements Listener {
- final AsyncCompletionCallback<Answer> callback;
- Answer answer;
+ private final AsyncCompletionCallback<Answer> callback;
+ private Answer answer;
- public CmdRunner(AsyncCompletionCallback<Answer> callback) {
+ CmdRunner(final AsyncCompletionCallback<Answer> callback) {
this.callback = callback;
}
@Override
public boolean processAnswers(long agentId, long seq, Answer[] answers) {
- answer = answers[0];
- executor.schedule(this, 10, TimeUnit.SECONDS);
+ this.answer = answers[0];
+ RemoteHostEndPoint.executorService.submit(this);
return true;
}
@@ -192,7 +192,7 @@
@Override
protected void runInContext() {
- callback.complete(answer);
+ this.callback.complete(this.answer);
}
}
@@ -205,7 +205,7 @@
setId(newHostId);
}
if (s_logger.isDebugEnabled()) {
- s_logger.debug("Sending command " + cmd.toString() + " to host: " + newHostId);
+ s_logger.debug("Sending command " + cmd.toString() + " (async) to host: " + newHostId);
}
agentMgr.send(newHostId, new Commands(cmd), new CmdRunner(callback));
} catch (AgentUnavailableException e) {
diff --git a/engine/storage/src/org/apache/cloudstack/storage/endpoint/DefaultEndPointSelector.java b/engine/storage/src/org/apache/cloudstack/storage/endpoint/DefaultEndPointSelector.java
index 7067b8c..d722a68 100644
--- a/engine/storage/src/org/apache/cloudstack/storage/endpoint/DefaultEndPointSelector.java
+++ b/engine/storage/src/org/apache/cloudstack/storage/endpoint/DefaultEndPointSelector.java
@@ -104,15 +104,17 @@
StringBuilder sbuilder = new StringBuilder();
sbuilder.append(sqlBase);
- if (scope.getScopeType() == ScopeType.HOST) {
- sbuilder.append(" and h.id = ");
- sbuilder.append(scope.getScopeId());
- } else if (scope.getScopeType() == ScopeType.CLUSTER) {
- sbuilder.append(" and h.cluster_id = ");
- sbuilder.append(scope.getScopeId());
- } else if (scope.getScopeType() == ScopeType.ZONE) {
- sbuilder.append(" and h.data_center_id = ");
- sbuilder.append(scope.getScopeId());
+ if (scope != null) {
+ if (scope.getScopeType() == ScopeType.HOST) {
+ sbuilder.append(" and h.id = ");
+ sbuilder.append(scope.getScopeId());
+ } else if (scope.getScopeType() == ScopeType.CLUSTER) {
+ sbuilder.append(" and h.cluster_id = ");
+ sbuilder.append(scope.getScopeId());
+ } else if (scope.getScopeType() == ScopeType.ZONE) {
+ sbuilder.append(" and h.data_center_id = ");
+ sbuilder.append(scope.getScopeId());
+ }
}
// TODO: order by rand() is slow if there are lot of hosts
sbuilder.append(" ORDER by rand() limit 1");