blob: 0640fc01e2ffdcfa7637d1786b3ebd8a4f43c696 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.util;
import com.google.common.base.Preconditions;
import java.lang.management.GarbageCollectorMXBean;
import java.lang.management.ManagementFactory;
import java.util.List;
/**
* This class monitors the percentage of time the JVM is paused in GC within
* the specified observation window, say 1 minute. The user can provide a
* hook which will be called whenever this percentage exceeds the specified
* threshold.
*/
public class GcTimeMonitor extends Thread {
private final long maxGcTimePercentage;
private final long observationWindowMs, sleepIntervalMs;
private final GcTimeAlertHandler alertHandler;
private final List<GarbageCollectorMXBean> gcBeans =
ManagementFactory.getGarbageCollectorMXBeans();
// Ring buffers containing GC timings and timestamps when timings were taken
private final TsAndData[] gcDataBuf;
private int bufSize, startIdx, endIdx;
private long startTime;
private final GcData curData = new GcData();
private volatile boolean shouldRun = true;
/**
* Create an instance of GCTimeMonitor. Once it's started, it will stay alive
* and monitor GC time percentage until shutdown() is called. If you don't
* put a limit on the number of GCTimeMonitor instances that you create, and
* alertHandler != null, you should necessarily call shutdown() once the given
* instance is not needed. Otherwise, you may create a memory leak, because
* each running GCTimeMonitor will keep its alertHandler object in memory,
* which in turn may reference and keep in memory many more other objects.
*
* @param observationWindowMs the interval over which the percentage
* of GC time should be calculated. A practical value would be somewhere
* between 30 sec and several minutes.
* @param sleepIntervalMs how frequently this thread should wake up to check
* GC timings. This is also a frequency with which alertHandler will be
* invoked if GC time percentage exceeds the specified limit. A practical
* value would likely be 500..1000 ms.
* @param maxGcTimePercentage A GC time percentage limit (0..100) within
* observationWindowMs. Once this is exceeded, alertHandler will be
* invoked every sleepIntervalMs milliseconds until GC time percentage
* falls below this limit.
* @param alertHandler a single method in this interface is invoked when GC
* time percentage exceeds the specified limit.
*/
public GcTimeMonitor(long observationWindowMs, long sleepIntervalMs,
int maxGcTimePercentage, GcTimeAlertHandler alertHandler) {
Preconditions.checkArgument(observationWindowMs > 0);
Preconditions.checkArgument(
sleepIntervalMs > 0 && sleepIntervalMs < observationWindowMs);
Preconditions.checkArgument(
maxGcTimePercentage >= 0 && maxGcTimePercentage <= 100);
this.observationWindowMs = observationWindowMs;
this.sleepIntervalMs = sleepIntervalMs;
this.maxGcTimePercentage = maxGcTimePercentage;
this.alertHandler = alertHandler;
bufSize = (int) (observationWindowMs / sleepIntervalMs + 2);
// Prevent the user from accidentally creating an abnormally big buffer,
// which will result in slow calculations and likely inaccuracy.
Preconditions.checkArgument(bufSize <= 128 * 1024);
gcDataBuf = new TsAndData[bufSize];
for (int i = 0; i < bufSize; i++) {
gcDataBuf[i] = new TsAndData();
}
this.setDaemon(true);
this.setName("GcTimeMonitor obsWindow = " + observationWindowMs +
", sleepInterval = " + sleepIntervalMs +
", maxGcTimePerc = " + maxGcTimePercentage);
}
@Override
public void run() {
startTime = System.currentTimeMillis();
curData.timestamp = startTime;
gcDataBuf[startIdx].setValues(startTime, 0);
while (shouldRun) {
try {
Thread.sleep(sleepIntervalMs);
} catch (InterruptedException ie) {
return;
}
calculateGCTimePercentageWithinObservedInterval();
if (alertHandler != null &&
curData.gcTimePercentage > maxGcTimePercentage) {
alertHandler.alert(curData.clone());
}
}
}
public void shutdown() {
shouldRun = false;
}
/** Returns a copy of the most recent data measured by this monitor. */
public GcData getLatestGcData() {
return curData.clone();
}
private void calculateGCTimePercentageWithinObservedInterval() {
long prevTotalGcTime = curData.totalGcTime;
long totalGcTime = 0;
long totalGcCount = 0;
for (GarbageCollectorMXBean gcBean : gcBeans) {
totalGcTime += gcBean.getCollectionTime();
totalGcCount += gcBean.getCollectionCount();
}
long gcTimeWithinSleepInterval = totalGcTime - prevTotalGcTime;
long ts = System.currentTimeMillis();
long gcMonitorRunTime = ts - startTime;
endIdx = (endIdx + 1) % bufSize;
gcDataBuf[endIdx].setValues(ts, gcTimeWithinSleepInterval);
// Move startIdx forward until we reach the first buffer entry with
// timestamp within the observation window.
long startObsWindowTs = ts - observationWindowMs;
while (gcDataBuf[startIdx].ts < startObsWindowTs && startIdx != endIdx) {
startIdx = (startIdx + 1) % bufSize;
}
// Calculate total GC time within observationWindowMs.
// We should be careful about GC time that passed before the first timestamp
// in our observation window.
long gcTimeWithinObservationWindow = Math.min(
gcDataBuf[startIdx].gcPause, gcDataBuf[startIdx].ts - startObsWindowTs);
if (startIdx != endIdx) {
for (int i = (startIdx + 1) % bufSize; i != endIdx;
i = (i + 1) % bufSize) {
gcTimeWithinObservationWindow += gcDataBuf[i].gcPause;
}
}
curData.update(ts, gcMonitorRunTime, totalGcTime, totalGcCount,
(int) (gcTimeWithinObservationWindow * 100 /
Math.min(observationWindowMs, gcMonitorRunTime)));
}
/**
* The user can provide an instance of a class implementing this interface
* when initializing a GcTimeMonitor to receive alerts when GC time
* percentage exceeds the specified threshold.
*/
public interface GcTimeAlertHandler {
void alert(GcData gcData);
}
/** Encapsulates data about GC pauses measured at the specific timestamp. */
public static class GcData implements Cloneable {
private long timestamp;
private long gcMonitorRunTime, totalGcTime, totalGcCount;
private int gcTimePercentage;
/** Returns the absolute timestamp when this measurement was taken. */
public long getTimestamp() {
return timestamp;
}
/** Returns the time since the start of the associated GcTimeMonitor. */
public long getGcMonitorRunTime() {
return gcMonitorRunTime;
}
/** Returns accumulated GC time since this JVM started. */
public long getAccumulatedGcTime() {
return totalGcTime;
}
/** Returns the accumulated number of GC pauses since this JVM started. */
public long getAccumulatedGcCount() {
return totalGcCount;
}
/**
* Returns the percentage (0..100) of time that the JVM spent in GC pauses
* within the observation window of the associated GcTimeMonitor.
*/
public int getGcTimePercentage() {
return gcTimePercentage;
}
private synchronized void update(long inTimestamp, long inGcMonitorRunTime,
long inTotalGcTime, long inTotalGcCount, int inGcTimePercentage) {
this.timestamp = inTimestamp;
this.gcMonitorRunTime = inGcMonitorRunTime;
this.totalGcTime = inTotalGcTime;
this.totalGcCount = inTotalGcCount;
this.gcTimePercentage = inGcTimePercentage;
}
@Override
public synchronized GcData clone() {
try {
return (GcData) super.clone();
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
}
}
private static class TsAndData {
private long ts; // Timestamp when this measurement was taken
private long gcPause; // Total GC pause time within the interval between ts
// and the timestamp of the previous measurement.
void setValues(long inTs, long inGcPause) {
this.ts = inTs;
this.gcPause = inGcPause;
}
}
}