blob: 382714710a2fac7756da14ce033a8afc953bcfab [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The SF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/
package org.apache.felix.hc.generalchecks;
import java.lang.management.ManagementFactory;
import java.lang.management.ThreadInfo;
import java.lang.management.ThreadMXBean;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.felix.hc.annotation.HealthCheckService;
import org.apache.felix.hc.api.FormattingResultLog;
import org.apache.felix.hc.api.HealthCheck;
import org.apache.felix.hc.api.Result;
import org.apache.felix.hc.api.ResultLog;
import org.osgi.service.component.annotations.Activate;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.ConfigurationPolicy;
import org.osgi.service.metatype.annotations.AttributeDefinition;
import org.osgi.service.metatype.annotations.Designate;
import org.osgi.service.metatype.annotations.ObjectClassDefinition;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@HealthCheckService(name = ThreadUsageCheck.HC_NAME)
@Component(configurationPolicy = ConfigurationPolicy.REQUIRE)
@Designate(ocd = ThreadUsageCheck.Config.class, factory = false)
public class ThreadUsageCheck implements HealthCheck {
private static final Logger LOG = LoggerFactory.getLogger(ThreadUsageCheck.class);
public static final String HC_NAME = "Thread Usage";
public static final String HC_LABEL = "Health Check: " + HC_NAME;
@ObjectClassDefinition(name = HC_LABEL, description = "Checks for thread usage and deadlocks")
public @interface Config {
@AttributeDefinition(name = "Name", description = "Name of this health check")
String hc_name() default HC_NAME;
@AttributeDefinition(name = "Tags", description = "List of tags for this health check, used to select subsets of health checks for execution e.g. by a composite health check.")
String[] hc_tags() default {};
@AttributeDefinition(name = "Sample Period", description = "Period to measure usage per thread")
long samplePeriodInMs() default 200;
@AttributeDefinition(name = "CPU Time Threshold for WARN in %", description = "Will WARN once this threshold is reached in average for all threads. This value is multiplied by number of available cores as reported by Runtime.getRuntime().availableProcessors()")
long cpuPercentageThresholdWarn() default 95;
}
private long samplePeriodInMs;
private long cpuPercentageThresholdWarn;
@Activate
protected final void activate(Config config) {
this.samplePeriodInMs = config.samplePeriodInMs();
this.cpuPercentageThresholdWarn = config.cpuPercentageThresholdWarn();
LOG.debug("Activated thread usage HC samplePeriodInMs={}ms cpuPercentageThresholdWarn={}%", samplePeriodInMs, cpuPercentageThresholdWarn);
}
@Override
public Result execute() {
FormattingResultLog log = new FormattingResultLog();
log.debug("Checking threads for exceeding {}% CPU time within time period of {}ms", cpuPercentageThresholdWarn, samplePeriodInMs);
try {
ThreadMXBean threadMxBean = ManagementFactory.getThreadMXBean();
List<ThreadTimeInfo> threadTimeInfos = collectThreadTimeInfos(log, threadMxBean);
Collections.sort(threadTimeInfos);
float totalCpuTimePercentage = 0;
for (int i = 0; i < threadTimeInfos.size(); i++) {
ThreadTimeInfo threadInfo = threadTimeInfos.get(i);
float cpuTimePercentage = ((float) threadInfo.getCpuTime() / ((float) samplePeriodInMs * 1000000)) * 100f;
totalCpuTimePercentage += cpuTimePercentage;
String msg = String.format("%4.1f", cpuTimePercentage) + "% used by thread \"" + threadInfo.name + "\"";
// usually just shows the 3 busiest threads. For the case more threads take more than 15% CPU time, up to 10 threads are shown.
// use hcDebug=true to show all threads
if (i < 3 || (i < 10 && cpuTimePercentage > 15)) {
log.info(msg);
} else {
log.debug(msg);
}
}
int availableProcessors = Runtime.getRuntime().availableProcessors();
boolean isHighCpuTime = totalCpuTimePercentage > (cpuPercentageThresholdWarn * availableProcessors);
Result.Status status = isHighCpuTime ? Result.Status.WARN : Result.Status.OK;
double cpuTimePerProcessor = totalCpuTimePercentage / availableProcessors;
String msg = threadTimeInfos.size() + " threads using " + String.format("%.1f", totalCpuTimePercentage) + "% CPU Time ("
+ String.format("%.1f", cpuTimePerProcessor) + "% per single core having " + availableProcessors + " cores)";
if (isHighCpuTime) {
msg += ">" + cpuPercentageThresholdWarn + "% threshold for WARN";
}
log.add(new ResultLog.Entry(status, msg));
checkForDeadlock(log, threadMxBean);
} catch (Exception e) {
LOG.error("Could not analyse thread usage " + e, e);
log.healthCheckError("Could not analyse thread usage", e);
}
return new Result(log);
}
List<ThreadTimeInfo> collectThreadTimeInfos(FormattingResultLog log, ThreadMXBean threadMxBean) {
Map<Long, ThreadTimeInfo> threadTimeInfos = new HashMap<Long, ThreadTimeInfo>();
long[] allThreadIds = threadMxBean.getAllThreadIds();
for (long threadId : allThreadIds) {
ThreadTimeInfo threadTimeInfo = new ThreadTimeInfo();
long threadCpuTimeStart = threadMxBean.getThreadCpuTime(threadId);
threadTimeInfo.start = threadCpuTimeStart;
ThreadInfo threadInfo = threadMxBean.getThreadInfo(threadId);
threadTimeInfo.name = threadInfo != null ? threadInfo.getThreadName() : "Thread id " + threadId + " (name not resolvable)";
threadTimeInfos.put(threadId, threadTimeInfo);
}
try {
Thread.sleep(samplePeriodInMs);
} catch (InterruptedException e) {
log.warn("Could not sleep configured samplePeriodInMs={} to gather thread load", samplePeriodInMs);
}
for (long threadId : allThreadIds) {
ThreadTimeInfo threadTimeInfo = threadTimeInfos.get(threadId);
if (threadTimeInfo == null) {
continue;
}
long threadCpuTimeStop = threadMxBean.getThreadCpuTime(threadId);
threadTimeInfo.stop = threadCpuTimeStop;
}
List<ThreadTimeInfo> threads = new ArrayList<ThreadTimeInfo>(threadTimeInfos.values());
return threads;
}
void checkForDeadlock(FormattingResultLog log, ThreadMXBean threadMxBean) {
long[] findDeadlockedThreads = threadMxBean.findDeadlockedThreads();
if (findDeadlockedThreads != null) {
for (long threadId : findDeadlockedThreads) {
log.critical("Thread " + threadMxBean.getThreadInfo(threadId).getThreadName() + " is DEADLOCKED");
}
}
}
static class ThreadTimeInfo implements Comparable<ThreadTimeInfo> {
long start;
long stop;
String name;
long getCpuTime() {
long cpuTime = stop - start;
if (cpuTime < 0) {
cpuTime = 0;
}
return cpuTime;
}
@Override
public int compareTo(ThreadTimeInfo otherThreadTimeInfo) {
if (otherThreadTimeInfo == null) {
return -1;
}
return (int) (otherThreadTimeInfo.getCpuTime() - this.getCpuTime());
}
}
}