blob: e6d8e2aba5ccc742cd728b68960150c884f07018 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sling.discovery.oak;
import java.lang.management.ManagementFactory;
import java.util.Collection;
import java.util.Set;
import javax.management.MBeanServer;
import javax.management.ObjectName;
import org.apache.sling.discovery.base.connectors.announcement.Announcement;
import org.apache.sling.discovery.base.connectors.announcement.AnnouncementRegistry;
import org.apache.felix.hc.api.HealthCheck;
import org.apache.felix.hc.api.Result;
import org.apache.felix.hc.api.FormattingResultLog;
import org.apache.sling.settings.SlingSettingsService;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Reference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* HealthCheck that builds on-top of DocumentNodeStore's
* determineServerTimeDifferenceMillis method which checks how much the local
* time differs from the DocumentStore's time. It then applies low- and
* high-water marks to that time difference:
* <ul>
* <li>if the value is higher than the high-water mark (5sec by default), then
* it issues a critical</li>
* <li>if the value is lower than the high-water but higher than the low-water
* mark (1sec by default), then it issues only a warn</li>
* <li>if the value is lower than the low-water mark, then it issues only an
* info</li>
* </ul>
*/
@Component(
immediate = true,
service = HealthCheck.class,
property = {
HealthCheck.NAME + "=Synchronized Clocks",
HealthCheck.MBEAN_NAME + "=slingDiscoveryOakSynchronizedClocks"
})
public class SynchronizedClocksHealthCheck implements HealthCheck {
protected final Logger logger = LoggerFactory.getLogger(getClass());
private static final String DOCUMENT_NODE_STORE_MBEAN = "org.apache.jackrabbit.oak:name=*,type=DocumentNodeStore";
private static final String TIME_DIFF_METHOD_NAME = "determineServerTimeDifferenceMillis";
private static final long INTRA_CLUSTER_HIGH_WATER_MARK = 5000;
private static final long INTRA_CLUSTER_LOW_WATER_MARK = 1000;
private static final long INTER_CLUSTER_HIGH_WATER_MARK = 10000;
private static final long INTER_CLUSTER_LOW_WATER_MARK = 5000;
@Reference
private AnnouncementRegistry announcementRegistry;
@Reference
private SlingSettingsService settingsService;
@Override
public Result execute() {
final FormattingResultLog resultLog = new FormattingResultLog();
resultLog.debug("Checking cluster internal clocks");
try {
final MBeanServer jmxServer = ManagementFactory.getPlatformMBeanServer();
ObjectName n = new ObjectName(DOCUMENT_NODE_STORE_MBEAN);
Set<ObjectName> names = jmxServer.queryNames(n, null);
if (names.size() == 0) {
resultLog.info("Intra-cluster test n/a (No DocumentNodeStore MBean found)");
} else {
ObjectName firstName = names.iterator().next();
final Object value = jmxServer.invoke(firstName, TIME_DIFF_METHOD_NAME, new Object[0], new String[0]);
logger.debug("{} returns {}", new Object[] { firstName, TIME_DIFF_METHOD_NAME, value });
resultLog.debug("{} returns {}", firstName, TIME_DIFF_METHOD_NAME, value);
if (value != null && (value instanceof Long)) {
Long diffMillis = (Long) value;
if (Math.abs(diffMillis) >= INTRA_CLUSTER_HIGH_WATER_MARK) {
logger.warn(
"execute: clocks in local cluster out of sync by {}ms "
+ "which is equal or higher than the high-water mark of {}ms.",
diffMillis, INTRA_CLUSTER_HIGH_WATER_MARK);
resultLog.critical(
"Clocks heavily out of sync in local cluster: "
+ "time difference of this VM with DocumentStore server: "
+ "{}ms is equal or larger than high-water mark of {}ms",
diffMillis, INTRA_CLUSTER_HIGH_WATER_MARK);
} else if (Math.abs(diffMillis) >= INTRA_CLUSTER_LOW_WATER_MARK) {
logger.warn(
"execute: clocks in local cluster out of sync by {}ms"
+ "ms which is equal or higher than the low-water mark of {}ms.",
diffMillis, INTRA_CLUSTER_LOW_WATER_MARK);
resultLog.warn(
"Clocks noticeably out of sync in local cluster: "
+ "time difference of this VM with DocumentStore server: "
+ "{}ms is equal or larger than low-water mark of {}ms",
diffMillis, INTRA_CLUSTER_LOW_WATER_MARK);
} else {
logger.debug("execute: clocks in local cluster in sync. diff is {}ms"
+ "ms which is within low-water mark of {}ms.", diffMillis, INTRA_CLUSTER_LOW_WATER_MARK);
resultLog.info("Clocks in sync in local cluster: time difference of this VM with DocumentStore server: "
+ "{}ms is within low-water mark of {}ms", diffMillis, INTRA_CLUSTER_LOW_WATER_MARK);
}
}
}
} catch (final Exception e) {
logger.warn("execute: {}, JMX method {} invocation failed: {}",
new Object[] { DOCUMENT_NODE_STORE_MBEAN, TIME_DIFF_METHOD_NAME, e });
resultLog.healthCheckError("{}, JMX method {} invocation failed: {}", DOCUMENT_NODE_STORE_MBEAN, TIME_DIFF_METHOD_NAME,
e);
}
final String slingId = settingsService == null ? "n/a" : settingsService.getSlingId();
if (announcementRegistry == null) {
logger.warn("execute: no announcementRegistry ({}) set", announcementRegistry);
resultLog.warn("Cannot determine topology clocks since no announcementRegistry ({}) set", announcementRegistry);
} else {
final Collection<Announcement> localAnnouncements = announcementRegistry.listLocalAnnouncements();
if (localAnnouncements.isEmpty()) {
logger.debug("execute: no topology connectors connected to local instance.");
resultLog.info("No topology connectors connected to local instance.");
}
for (Announcement ann : localAnnouncements) {
final String peerSlingId = ann.getOwnerId();
final long originallyCreatedAt = ann.getOriginallyCreatedAt();
final long receivedAt = ann.getReceivedAt();
long diffMillis = Math.abs(originallyCreatedAt - receivedAt);
if (Math.abs(diffMillis) >= INTER_CLUSTER_HIGH_WATER_MARK) {
logger.warn(
"execute: clocks between local instance (slingId: {}) and remote instance (slingId: {}) out of sync by {}ms"
+ "ms which is equal or higher than the high-water mark of {}ms.",
new Object[] { slingId, peerSlingId, diffMillis, INTER_CLUSTER_HIGH_WATER_MARK });
resultLog.critical(
"Clocks heavily out of sync between local instance (slingId: {}) and remote instance (slingId: {}): "
+ "by {}ms which is equal or larger than high-water mark of {}ms",
new Object[] { slingId, peerSlingId, diffMillis, INTER_CLUSTER_HIGH_WATER_MARK });
} else if (Math.abs(diffMillis) >= INTER_CLUSTER_LOW_WATER_MARK) {
logger.warn(
"execute: clocks out of sync between local instance (slingId: {}) and remote instance (slingId: {}) by {}ms "
+ "ms which is equal or higher than the low-water mark of {}ms.",
new Object[] { slingId, peerSlingId, diffMillis, INTER_CLUSTER_HIGH_WATER_MARK });
resultLog.warn(
"Clocks noticeably out of sync between local instance (slingId: {}) and remote instance (slingId: {}): "
+ "by {}ms which is equal or larger than low-water mark of {}ms",
new Object[] { slingId, peerSlingId, diffMillis, INTER_CLUSTER_HIGH_WATER_MARK });
} else {
logger.debug(
"execute: clocks in sync between local instance (slingId: {}) and remote instance (slingId: {}). "
+ "diff is {}ms which is within low-water mark of {}ms.",
new Object[] { slingId, peerSlingId, diffMillis, INTER_CLUSTER_HIGH_WATER_MARK });
resultLog.info(
"Clocks in sync between local instance (slingId: {}) and remote instance (slingId: {}): "
+ "diff is {}ms which is within low-water mark of {}ms",
new Object[] { slingId, peerSlingId, diffMillis, INTER_CLUSTER_HIGH_WATER_MARK });
}
}
}
return new Result(resultLog);
}
}