SLING-5280 : change the separate self-check of the HeartbeatHandler to call checkForLocalClusterViewChange only - instead of checkForTopologyChange - reason being that the former does not call into the AnnouncementRegistry which could be blocked in the very same case this self-check should help
git-svn-id: https://svn.apache.org/repos/asf/sling/trunk@1713491 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/src/main/java/org/apache/sling/discovery/impl/DiscoveryServiceImpl.java b/src/main/java/org/apache/sling/discovery/impl/DiscoveryServiceImpl.java
index 85e5c27..abb3cbc 100644
--- a/src/main/java/org/apache/sling/discovery/impl/DiscoveryServiceImpl.java
+++ b/src/main/java/org/apache/sling/discovery/impl/DiscoveryServiceImpl.java
@@ -56,6 +56,8 @@
import org.apache.sling.discovery.base.commons.BaseDiscoveryService;
import org.apache.sling.discovery.base.commons.ClusterViewService;
import org.apache.sling.discovery.base.commons.DefaultTopologyView;
+import org.apache.sling.discovery.base.commons.UndefinedClusterViewException;
+import org.apache.sling.discovery.base.commons.UndefinedClusterViewException.Reason;
import org.apache.sling.discovery.base.connectors.announcement.AnnouncementRegistry;
import org.apache.sling.discovery.base.connectors.ping.ConnectorRegistry;
import org.apache.sling.discovery.commons.providers.BaseTopologyView;
@@ -64,6 +66,7 @@
import org.apache.sling.discovery.commons.providers.ViewStateManager;
import org.apache.sling.discovery.commons.providers.base.ViewStateManagerFactory;
import org.apache.sling.discovery.commons.providers.spi.ClusterSyncService;
+import org.apache.sling.discovery.commons.providers.spi.LocalClusterView;
import org.apache.sling.discovery.commons.providers.util.PropertyNameHelper;
import org.apache.sling.discovery.commons.providers.util.ResourceHelper;
import org.apache.sling.discovery.impl.cluster.ClusterViewServiceImpl;
@@ -607,6 +610,44 @@
return provider.hashCode();
}
}
+
+ /**
+ * only checks for local clusterView changes.
+ * thus eg avoids doing synchronized with annotationregistry
+ **/
+ public void checkForLocalClusterViewChange() {
+ viewStateManagerLock.lock();
+ try{
+ if (!activated) {
+ logger.debug("checkForLocalClusterViewChange: not yet activated, ignoring");
+ return;
+ }
+ try {
+ ClusterViewService clusterViewService = getClusterViewService();
+ if (clusterViewService == null) {
+ throw new UndefinedClusterViewException(
+ Reason.REPOSITORY_EXCEPTION,
+ "no ClusterViewService available at the moment");
+ }
+ LocalClusterView localClusterView = clusterViewService.getLocalClusterView();
+ } catch (UndefinedClusterViewException e) {
+ // SLING-5030 : when we're cut off from the local cluster we also
+ // treat it as being cut off from the entire topology, ie we don't
+ // update the announcements but just return
+ // the previous oldView marked as !current
+ logger.info("checkForLocalClusterViewChange: undefined cluster view: "+e.getReason()+"] "+e);
+ getOldView().setNotCurrent();
+ viewStateManager.handleChanging();
+ if (e.getReason()==Reason.ISOLATED_FROM_TOPOLOGY) {
+ handleIsolatedFromTopology();
+ }
+ }
+ } finally {
+ if (viewStateManagerLock!=null) {
+ viewStateManagerLock.unlock();
+ }
+ }
+ }
/**
* Check the current topology for any potential change
diff --git a/src/main/java/org/apache/sling/discovery/impl/common/heartbeat/HeartbeatHandler.java b/src/main/java/org/apache/sling/discovery/impl/common/heartbeat/HeartbeatHandler.java
index 532f17f..1177f49 100644
--- a/src/main/java/org/apache/sling/discovery/impl/common/heartbeat/HeartbeatHandler.java
+++ b/src/main/java/org/apache/sling/discovery/impl/common/heartbeat/HeartbeatHandler.java
@@ -243,7 +243,7 @@
logger.warn("initialize: Repeat interval cannot be zero. Defaulting to 10sec.");
interval = 10;
}
- periodicCheckJob = new PeriodicBackgroundJob(interval, NAME+".checkForTopologyChange", new Runnable() {
+ periodicCheckJob = new PeriodicBackgroundJob(interval, NAME+".checkForLocalClusterViewChange", new Runnable() {
@Override
public void run() {
@@ -258,7 +258,7 @@
final long heartbeatIntervalMillis = config.getHeartbeatInterval() * 1000;
final long maxTimeSinceHb = heartbeatTimeoutMillis - 2 * heartbeatIntervalMillis;
if (timeSinceHb > maxTimeSinceHb) {
- logger.info("checkForTopologyChange/.run: time since local instance last wrote a heartbeat is " + timeSinceHb + "ms"
+ logger.info("checkForLocalClusterViewChange/.run: time since local instance last wrote a heartbeat is " + timeSinceHb + "ms"
+ " (heartbeatTimeoutMillis=" + heartbeatTimeoutMillis + ", heartbeatIntervalMillis=" + heartbeatIntervalMillis
+ " => maxTimeSinceHb=" + maxTimeSinceHb + "). Flagging us as (still) changing");
// mark the current establishedView as faulty
@@ -272,11 +272,11 @@
return;
}
}
- // SLING-5195: guarantee frequent calls to checkForTopologyChange,
+ // SLING-5195: guarantee frequent calls to checkForLocalClusterViewChange,
// independently of blocked write/save operations
- logger.debug("checkForTopologyChange/.run: going to check for topology change...");
- discoveryService.checkForTopologyChange();
- logger.debug("checkForTopologyChange/.run: check for topology change done.");
+ logger.debug("checkForLocalClusterViewChange/.run: going to check for topology change...");
+ discoveryService.checkForLocalClusterViewChange();
+ logger.debug("checkForLocalClusterViewChange/.run: check for topology change done.");
}
});