| /** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.hadoop.hbase.regionserver; |
| |
| import static org.apache.hadoop.hbase.regionserver.HeapMemoryManager.BLOCK_CACHE_SIZE_MAX_RANGE_KEY; |
| import static org.apache.hadoop.hbase.regionserver.HeapMemoryManager.BLOCK_CACHE_SIZE_MIN_RANGE_KEY; |
| import static org.apache.hadoop.hbase.HConstants.HFILE_BLOCK_CACHE_SIZE_KEY; |
| import static org.apache.hadoop.hbase.regionserver.HeapMemoryManager.MEMSTORE_SIZE_MAX_RANGE_KEY; |
| import static org.apache.hadoop.hbase.regionserver.HeapMemoryManager.MEMSTORE_SIZE_MIN_RANGE_KEY; |
| |
| import org.apache.yetus.audience.InterfaceAudience; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.hbase.HConstants; |
| import org.apache.hadoop.hbase.io.util.MemorySizeUtil; |
| import org.apache.hadoop.hbase.regionserver.HeapMemoryManager.TunerContext; |
| import org.apache.hadoop.hbase.regionserver.HeapMemoryManager.TunerResult; |
| import org.apache.hadoop.hbase.util.RollingStatCalculator; |
| |
| /** |
| * The default implementation for the HeapMemoryTuner. This will do statistical checks on |
| * number of evictions, cache misses and flushes to decide whether there should be changes |
| * in the heap size of memstore/block cache. During each tuner operation tuner takes a step |
| * which can either be INCREASE_BLOCK_CACHE_SIZE (increase block cache size), |
| * INCREASE_MEMSTORE_SIZE (increase memstore size) and by default it is NEUTRAL (no change). |
| * We say block cache is sufficient when there is no block cache eviction at all or major amount of |
| * memory allocated to block cache is empty, similarly we say memory allocated for memstore is |
| * sufficient when there is no memstore flushes because of heap pressure or major amount of |
| * memory allocated to memstore is empty. If both are sufficient we do nothing, if exactly one of |
| * them is found to be sufficient we decrease its size by <i>step</i> and increase the other by |
| * same amount. If none of them is sufficient we do statistical analysis on number of cache misses |
| * and flushes to determine tuner direction. Based on these statistics we decide the tuner |
| * direction. If we are not confident about which step direction to take we do nothing and wait for |
| * next iteration. On expectation we will be tuning for at least 10% tuner calls. The number of |
| * past periods to consider for statistics calculation can be specified in config by |
| * <i>hbase.regionserver.heapmemory.autotuner.lookup.periods</i>. Also these many initial calls to |
| * tuner will be ignored (cache is warming up and we leave the system to reach steady state). |
| * After the tuner takes a step, in next call we insure that last call was indeed helpful and did |
| * not do us any harm. If not then we revert the previous step. The step size is dynamic and it |
| * changes based on current and past few tuning directions and their step sizes. We maintain a |
| * parameter <i>decayingAvgTunerStepSize</i> which is sum of past tuner steps with |
| * sign(positive for increase in memstore and negative for increase in block cache). But rather |
| * than simple sum it is calculated by giving more priority to the recent tuning steps. |
| * When last few tuner steps were NETURAL then we assume we are restarting the tuning process and |
| * step size is updated to maximum allowed size which can be specified in config by |
| * <i>hbase.regionserver.heapmemory.autotuner.step.max</i>. If in a particular tuning operation |
| * the step direction is opposite to what indicated by <i>decayingTunerStepSizeSum</i> |
| * we decrease the step size by half. Step size does not change in other tuning operations. |
| * When step size gets below a certain threshold then the following tuner operations are |
| * considered to be neutral. The minimum step size can be specified in config by |
| * <i>hbase.regionserver.heapmemory.autotuner.step.min</i>. |
| */ |
| @InterfaceAudience.Private |
| class DefaultHeapMemoryTuner implements HeapMemoryTuner { |
| public static final String MAX_STEP_KEY = "hbase.regionserver.heapmemory.autotuner.step.max"; |
| public static final String MIN_STEP_KEY = "hbase.regionserver.heapmemory.autotuner.step.min"; |
| public static final String SUFFICIENT_MEMORY_LEVEL_KEY = |
| "hbase.regionserver.heapmemory.autotuner.sufficient.memory.level"; |
| public static final String LOOKUP_PERIODS_KEY = |
| "hbase.regionserver.heapmemory.autotuner.lookup.periods"; |
| public static final String NUM_PERIODS_TO_IGNORE = |
| "hbase.regionserver.heapmemory.autotuner.ignored.periods"; |
| // Maximum step size that the tuner can take |
| public static final float DEFAULT_MAX_STEP_VALUE = 0.04f; // 4% |
| // Minimum step size that the tuner can take |
| public static final float DEFAULT_MIN_STEP_VALUE = 0.00125f; // 0.125% |
| // If current block cache size or memstore size in use is below this level relative to memory |
| // provided to it then corresponding component will be considered to have sufficient memory |
| public static final float DEFAULT_SUFFICIENT_MEMORY_LEVEL_VALUE = 0.5f; // 50% |
| // Number of tuner periods that will be considered while calculating mean and deviation |
| // If set to zero, all stats will be calculated from the start |
| public static final int DEFAULT_LOOKUP_PERIODS = 60; |
| public static final int DEFAULT_NUM_PERIODS_IGNORED = 60; |
| private static final TunerResult NO_OP_TUNER_RESULT = new TunerResult(false); |
| // If deviation of tuner step size gets below this value then it means past few periods were |
| // NEUTRAL(given that last tuner period was also NEUTRAL). |
| private static final double TUNER_STEP_EPS = 1e-6; |
| |
| private Logger LOG = LoggerFactory.getLogger(DefaultHeapMemoryTuner.class); |
| private TunerResult TUNER_RESULT = new TunerResult(true); |
| private Configuration conf; |
| private float sufficientMemoryLevel = DEFAULT_SUFFICIENT_MEMORY_LEVEL_VALUE; |
| private float maximumStepSize = DEFAULT_MAX_STEP_VALUE; |
| private float minimumStepSize = DEFAULT_MIN_STEP_VALUE; |
| private int tunerLookupPeriods = DEFAULT_LOOKUP_PERIODS; |
| private int numPeriodsToIgnore = DEFAULT_NUM_PERIODS_IGNORED; |
| // Counter to ignore few initial periods while cache is still warming up |
| // Memory tuner will do no operation for the first "tunerLookupPeriods" |
| private int ignoreInitialPeriods = 0; |
| |
| private float globalMemStorePercentMinRange; |
| private float globalMemStorePercentMaxRange; |
| private float blockCachePercentMinRange; |
| private float blockCachePercentMaxRange; |
| |
| private float globalMemStoreLimitLowMarkPercent; |
| |
| // Store statistics about the corresponding parameters for memory tuning |
| private RollingStatCalculator rollingStatsForCacheMisses; |
| private RollingStatCalculator rollingStatsForFlushes; |
| private RollingStatCalculator rollingStatsForEvictions; |
| private RollingStatCalculator rollingStatsForTunerSteps; |
| // Set step size to max value for tuning, this step size will adjust dynamically while tuning |
| private float step = DEFAULT_MAX_STEP_VALUE; |
| private StepDirection prevTuneDirection = StepDirection.NEUTRAL; |
| //positive means memstore's size was increased |
| //It is not just arithmetic sum of past tuner periods. More priority is given to recent |
| //tuning steps. |
| private double decayingTunerStepSizeSum = 0; |
| |
| @Override |
| public TunerResult tune(TunerContext context) { |
| float curMemstoreSize = context.getCurMemStoreSize(); |
| float curBlockCacheSize = context.getCurBlockCacheSize(); |
| addToRollingStats(context); |
| |
| if (ignoreInitialPeriods < numPeriodsToIgnore) { |
| // Ignoring the first few tuner periods |
| ignoreInitialPeriods++; |
| rollingStatsForTunerSteps.insertDataValue(0); |
| return NO_OP_TUNER_RESULT; |
| } |
| StepDirection newTuneDirection = getTuneDirection(context); |
| |
| long blockedFlushCount = context.getBlockedFlushCount(); |
| long unblockedFlushCount = context.getUnblockedFlushCount(); |
| long totalOnheapFlushCount = blockedFlushCount + unblockedFlushCount; |
| boolean offheapMemstore = context.isOffheapMemStore(); |
| float newMemstoreSize; |
| float newBlockCacheSize; |
| |
| // Adjusting step size for tuning to get to steady state or restart from steady state. |
| // Even if the step size was 4% and 32 GB memory size, we will be shifting 1 GB back and forth |
| // per tuner operation and it can affect the performance of cluster so we keep on decreasing |
| // step size until everything settles. |
| if (prevTuneDirection == StepDirection.NEUTRAL |
| && newTuneDirection != StepDirection.NEUTRAL |
| && rollingStatsForTunerSteps.getDeviation() < TUNER_STEP_EPS) { |
| // Restarting the tuning from steady state and setting step size to maximum. |
| // The deviation cannot be that low if last period was neutral and some recent periods were |
| // not neutral. |
| step = maximumStepSize; |
| } else if ((newTuneDirection == StepDirection.INCREASE_MEMSTORE_SIZE |
| && decayingTunerStepSizeSum < 0) || |
| (newTuneDirection == StepDirection.INCREASE_BLOCK_CACHE_SIZE |
| && decayingTunerStepSizeSum > 0)) { |
| // Current step is opposite of past tuner actions so decrease the step size to reach steady |
| // state. |
| if (!offheapMemstore && step != minimumStepSize) { |
| // we leave the step to be at minimumStepSize for offheap memstore |
| step = step / 2.00f; |
| } |
| } |
| if (step < minimumStepSize) { |
| // If step size is too small then we do nothing. |
| LOG.debug("Tuner step size is too low; we will not perform any tuning this time."); |
| step = 0.0f; |
| newTuneDirection = StepDirection.NEUTRAL; |
| } |
| // There are no flushes due to onheap pressure and |
| // we have an offheap memstore and we are in need of more block_cache size. |
| if (totalOnheapFlushCount == 0 && offheapMemstore |
| && newTuneDirection == StepDirection.INCREASE_BLOCK_CACHE_SIZE) { |
| // we are sure that there are flushes only due to offheap pressure |
| // So don't do the memstore decrease equal to the step size. Instead do minimum stepSize |
| // decrease. But even if we have some flushes due to heap then it is better we tune |
| // the existing way. |
| step = minimumStepSize; |
| } |
| // Increase / decrease the memstore / block cache sizes depending on new tuner step. |
| // We don't want to exert immediate pressure on memstore. So, we decrease its size gracefully; |
| // we set a minimum bar in the middle of the total memstore size and the lower limit. |
| float minMemstoreSize = ((globalMemStoreLimitLowMarkPercent + 1) * curMemstoreSize) / 2.00f; |
| |
| switch (newTuneDirection) { |
| case INCREASE_BLOCK_CACHE_SIZE: |
| if (curMemstoreSize - step < minMemstoreSize) { |
| step = curMemstoreSize - minMemstoreSize; |
| } |
| newMemstoreSize = curMemstoreSize - step; |
| newBlockCacheSize = curBlockCacheSize + step; |
| rollingStatsForTunerSteps.insertDataValue(-(int)(step*100000)); |
| decayingTunerStepSizeSum = (decayingTunerStepSizeSum - step)/2.00f; |
| break; |
| case INCREASE_MEMSTORE_SIZE: |
| newBlockCacheSize = curBlockCacheSize - step; |
| newMemstoreSize = curMemstoreSize + step; |
| rollingStatsForTunerSteps.insertDataValue((int)(step*100000)); |
| decayingTunerStepSizeSum = (decayingTunerStepSizeSum + step)/2.00f; |
| break; |
| default: |
| prevTuneDirection = StepDirection.NEUTRAL; |
| rollingStatsForTunerSteps.insertDataValue(0); |
| decayingTunerStepSizeSum = (decayingTunerStepSizeSum)/2.00f; |
| return NO_OP_TUNER_RESULT; |
| } |
| // Check we are within max/min bounds. |
| if (newMemstoreSize > globalMemStorePercentMaxRange) { |
| newMemstoreSize = globalMemStorePercentMaxRange; |
| } else if (newMemstoreSize < globalMemStorePercentMinRange) { |
| newMemstoreSize = globalMemStorePercentMinRange; |
| } |
| if (newBlockCacheSize > blockCachePercentMaxRange) { |
| newBlockCacheSize = blockCachePercentMaxRange; |
| } else if (newBlockCacheSize < blockCachePercentMinRange) { |
| newBlockCacheSize = blockCachePercentMinRange; |
| } |
| TUNER_RESULT.setBlockCacheSize(newBlockCacheSize); |
| TUNER_RESULT.setMemStoreSize(newMemstoreSize); |
| prevTuneDirection = newTuneDirection; |
| return TUNER_RESULT; |
| } |
| |
| /** |
| * Determine best direction of tuning base on given context. |
| * @param context The tuner context. |
| * @return tuning direction. |
| */ |
| private StepDirection getTuneDirection(TunerContext context) { |
| StepDirection newTuneDirection = StepDirection.NEUTRAL; |
| long blockedFlushCount = context.getBlockedFlushCount(); |
| long unblockedFlushCount = context.getUnblockedFlushCount(); |
| long evictCount = context.getEvictCount(); |
| long cacheMissCount = context.getCacheMissCount(); |
| long totalFlushCount = blockedFlushCount + unblockedFlushCount; |
| float curMemstoreSize = context.getCurMemStoreSize(); |
| float curBlockCacheSize = context.getCurBlockCacheSize(); |
| StringBuilder tunerLog = new StringBuilder(); |
| // We can consider memstore or block cache to be sufficient if |
| // we are using only a minor fraction of what have been already provided to it. |
| boolean earlyMemstoreSufficientCheck = totalFlushCount == 0 |
| || context.getCurMemStoreUsed() < curMemstoreSize * sufficientMemoryLevel; |
| boolean earlyBlockCacheSufficientCheck = evictCount == 0 || |
| context.getCurBlockCacheUsed() < curBlockCacheSize * sufficientMemoryLevel; |
| if (earlyMemstoreSufficientCheck && earlyBlockCacheSufficientCheck) { |
| // Both memstore and block cache memory seems to be sufficient. No operation required. |
| newTuneDirection = StepDirection.NEUTRAL; |
| } else if (earlyMemstoreSufficientCheck) { |
| // Increase the block cache size and corresponding decrease in memstore size. |
| newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE; |
| } else if (earlyBlockCacheSufficientCheck) { |
| // Increase the memstore size and corresponding decrease in block cache size. |
| newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE; |
| } else { |
| // Early checks for sufficient memory failed. Tuning memory based on past statistics. |
| // Boolean indicator to show if we need to revert previous step or not. |
| boolean isReverting = false; |
| switch (prevTuneDirection) { |
| // Here we are using number of evictions rather than cache misses because it is more |
| // strong indicator for deficient cache size. Improving caching is what we |
| // would like to optimize for in steady state. |
| case INCREASE_BLOCK_CACHE_SIZE: |
| if ((double)evictCount > rollingStatsForEvictions.getMean() || |
| (double)totalFlushCount > rollingStatsForFlushes.getMean() + |
| rollingStatsForFlushes.getDeviation()/2.00) { |
| // Reverting previous step as it was not useful. |
| // Tuning failed to decrease evictions or tuning resulted in large number of flushes. |
| newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE; |
| tunerLog.append("We will revert previous tuning"); |
| if ((double)evictCount > rollingStatsForEvictions.getMean()) { |
| tunerLog.append(" because we could not decrease evictions sufficiently."); |
| } else { |
| tunerLog.append(" because the number of flushes rose significantly."); |
| } |
| isReverting = true; |
| } |
| break; |
| case INCREASE_MEMSTORE_SIZE: |
| if ((double)totalFlushCount > rollingStatsForFlushes.getMean() || |
| (double)evictCount > rollingStatsForEvictions.getMean() + |
| rollingStatsForEvictions.getDeviation()/2.00) { |
| // Reverting previous step as it was not useful. |
| // Tuning failed to decrease flushes or tuning resulted in large number of evictions. |
| newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE; |
| tunerLog.append("We will revert previous tuning"); |
| if ((double)totalFlushCount > rollingStatsForFlushes.getMean()) { |
| tunerLog.append(" because we could not decrease flushes sufficiently."); |
| } else { |
| tunerLog.append(" because number of evictions rose significantly."); |
| } |
| isReverting = true; |
| } |
| break; |
| default: |
| // Last step was neutral, revert doesn't not apply here. |
| break; |
| } |
| // If we are not reverting. We try to tune memory sizes by looking at cache misses / flushes. |
| if (!isReverting){ |
| // mean +- deviation*0.8 is considered to be normal |
| // below it its consider low and above it is considered high. |
| // We can safely assume that the number cache misses, flushes are normally distributed over |
| // past periods and hence on all the above mentioned classes (normal, high and low) |
| // are likely to occur with probability 56%, 22%, 22% respectively. Hence there is at |
| // least ~10% probability that we will not fall in NEUTRAL step. |
| // This optimization solution is feedback based and we revert when we |
| // dont find our steps helpful. Hence we want to do tuning only when we have clear |
| // indications because too many unnecessary tuning may affect the performance of cluster. |
| if ((double)cacheMissCount < rollingStatsForCacheMisses.getMean() - |
| rollingStatsForCacheMisses.getDeviation()*0.80 && |
| (double)totalFlushCount < rollingStatsForFlushes.getMean() - |
| rollingStatsForFlushes.getDeviation()*0.80) { |
| // Everything is fine no tuning required |
| newTuneDirection = StepDirection.NEUTRAL; |
| } else if ((double)cacheMissCount > rollingStatsForCacheMisses.getMean() + |
| rollingStatsForCacheMisses.getDeviation()*0.80 && |
| (double)totalFlushCount < rollingStatsForFlushes.getMean() - |
| rollingStatsForFlushes.getDeviation()*0.80) { |
| // more misses , increasing cache size |
| newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE; |
| tunerLog.append( |
| "Going to increase block cache size due to increase in number of cache misses."); |
| } else if ((double)cacheMissCount < rollingStatsForCacheMisses.getMean() - |
| rollingStatsForCacheMisses.getDeviation()*0.80 && |
| (double)totalFlushCount > rollingStatsForFlushes.getMean() + |
| rollingStatsForFlushes.getDeviation()*0.80) { |
| // more flushes , increasing memstore size |
| newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE; |
| tunerLog.append("Going to increase memstore size due to increase in number of flushes."); |
| } else if (blockedFlushCount > 0 && prevTuneDirection == StepDirection.NEUTRAL) { |
| // we do not want blocked flushes |
| newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE; |
| tunerLog.append("Going to increase memstore size due to" |
| + blockedFlushCount + " blocked flushes."); |
| } else { |
| // Default. Not enough facts to do tuning. |
| tunerLog.append("Going to do nothing because we " |
| + "could not determine best tuning direction"); |
| newTuneDirection = StepDirection.NEUTRAL; |
| } |
| } |
| } |
| if (LOG.isDebugEnabled()) { |
| LOG.debug(tunerLog.toString()); |
| } |
| return newTuneDirection; |
| } |
| |
| /** |
| * Add the given context to the rolling tuner stats. |
| * @param context The tuner context. |
| */ |
| private void addToRollingStats(TunerContext context) { |
| rollingStatsForCacheMisses.insertDataValue(context.getCacheMissCount()); |
| rollingStatsForFlushes |
| .insertDataValue(context.getBlockedFlushCount() + context.getUnblockedFlushCount()); |
| rollingStatsForEvictions.insertDataValue(context.getEvictCount()); |
| } |
| |
| @Override |
| public Configuration getConf() { |
| return this.conf; |
| } |
| |
| @Override |
| public void setConf(Configuration conf) { |
| this.conf = conf; |
| this.maximumStepSize = conf.getFloat(MAX_STEP_KEY, DEFAULT_MAX_STEP_VALUE); |
| this.minimumStepSize = conf.getFloat(MIN_STEP_KEY, DEFAULT_MIN_STEP_VALUE); |
| this.step = this.maximumStepSize; |
| this.sufficientMemoryLevel = conf.getFloat(SUFFICIENT_MEMORY_LEVEL_KEY, |
| DEFAULT_SUFFICIENT_MEMORY_LEVEL_VALUE); |
| this.tunerLookupPeriods = conf.getInt(LOOKUP_PERIODS_KEY, DEFAULT_LOOKUP_PERIODS); |
| this.blockCachePercentMinRange = conf.getFloat(BLOCK_CACHE_SIZE_MIN_RANGE_KEY, |
| conf.getFloat(HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT)); |
| this.blockCachePercentMaxRange = conf.getFloat(BLOCK_CACHE_SIZE_MAX_RANGE_KEY, |
| conf.getFloat(HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT)); |
| this.globalMemStorePercentMinRange = conf.getFloat(MEMSTORE_SIZE_MIN_RANGE_KEY, |
| MemorySizeUtil.getGlobalMemStoreHeapPercent(conf, false)); |
| this.globalMemStorePercentMaxRange = conf.getFloat(MEMSTORE_SIZE_MAX_RANGE_KEY, |
| MemorySizeUtil.getGlobalMemStoreHeapPercent(conf, false)); |
| this.globalMemStoreLimitLowMarkPercent = MemorySizeUtil.getGlobalMemStoreHeapLowerMark(conf, |
| true); |
| // Default value of periods to ignore is number of lookup periods |
| this.numPeriodsToIgnore = conf.getInt(NUM_PERIODS_TO_IGNORE, this.tunerLookupPeriods); |
| this.rollingStatsForCacheMisses = new RollingStatCalculator(this.tunerLookupPeriods); |
| this.rollingStatsForFlushes = new RollingStatCalculator(this.tunerLookupPeriods); |
| this.rollingStatsForEvictions = new RollingStatCalculator(this.tunerLookupPeriods); |
| this.rollingStatsForTunerSteps = new RollingStatCalculator(this.tunerLookupPeriods); |
| } |
| |
| private enum StepDirection{ |
| // block cache size was increased |
| INCREASE_BLOCK_CACHE_SIZE, |
| // memstore size was increased |
| INCREASE_MEMSTORE_SIZE, |
| // no operation was performed |
| NEUTRAL |
| } |
| } |