| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.cassandra.utils; |
| |
| import java.io.IOException; |
| import java.util.Arrays; |
| import java.util.concurrent.atomic.AtomicLongArray; |
| |
| import com.google.common.base.Objects; |
| |
| import org.apache.cassandra.db.TypeSizes; |
| import org.apache.cassandra.io.ISerializer; |
| import org.apache.cassandra.io.util.DataInputPlus; |
| import org.apache.cassandra.io.util.DataOutputPlus; |
| import org.slf4j.Logger; |
| |
| public class EstimatedHistogram |
| { |
| public static final EstimatedHistogramSerializer serializer = new EstimatedHistogramSerializer(); |
| |
| /** |
| * The series of values to which the counts in `buckets` correspond: |
| * 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 17, 20, etc. |
| * Thus, a `buckets` of [0, 0, 1, 10] would mean we had seen one value of 3 and 10 values of 4. |
| * |
| * The series starts at 1 and grows by 1.2 each time (rounding and removing duplicates). It goes from 1 |
| * to around 36M by default (creating 90+1 buckets), which will give us timing resolution from microseconds to |
| * 36 seconds, with less precision as the numbers get larger. |
| * |
| * Each bucket represents values from (previous bucket offset, current offset]. |
| */ |
| private final long[] bucketOffsets; |
| |
| // buckets is one element longer than bucketOffsets -- the last element is values greater than the last offset |
| final AtomicLongArray buckets; |
| |
| public EstimatedHistogram() |
| { |
| this(90); |
| } |
| |
| public EstimatedHistogram(int bucketCount) |
| { |
| this(bucketCount, false); |
| } |
| |
| public EstimatedHistogram(int bucketCount, boolean considerZeroes) |
| { |
| bucketOffsets = newOffsets(bucketCount, considerZeroes); |
| buckets = new AtomicLongArray(bucketOffsets.length + 1); |
| } |
| |
| /** |
| * Create EstimatedHistogram from only bucket data. |
| * |
| * @param bucketData bucket data |
| */ |
| public EstimatedHistogram(long[] bucketData) |
| { |
| assert bucketData != null && bucketData.length > 0 : "Bucket data must be an array of size more than 0"; |
| bucketOffsets = newOffsets(bucketData.length - 1, false); |
| buckets = new AtomicLongArray(bucketData); |
| } |
| |
| public EstimatedHistogram(long[] offsets, long[] bucketData) |
| { |
| assert bucketData.length == offsets.length +1; |
| bucketOffsets = offsets; |
| buckets = new AtomicLongArray(bucketData); |
| } |
| |
| public static long[] newOffsets(int size, boolean considerZeroes) |
| { |
| long[] result = new long[size + (considerZeroes ? 1 : 0)]; |
| int i = 0; |
| if (considerZeroes) |
| result[i++] = 0; |
| long last = 1; |
| result[i++] = last; |
| for (; i < result.length; i++) |
| { |
| long next = Math.round(last * 1.2); |
| if (next == last) |
| next++; |
| result[i] = next; |
| last = next; |
| } |
| |
| return result; |
| } |
| |
| /** |
| * @return the histogram values corresponding to each bucket index |
| */ |
| public long[] getBucketOffsets() |
| { |
| return bucketOffsets; |
| } |
| |
| /** |
| * Increments the count of the bucket closest to n, rounding UP. |
| * @param n |
| */ |
| public void add(long n) |
| { |
| int index = Arrays.binarySearch(bucketOffsets, n); |
| if (index < 0) |
| { |
| // inexact match, take the first bucket higher than n |
| index = -index - 1; |
| } |
| // else exact match; we're good |
| buckets.incrementAndGet(index); |
| } |
| |
| /** |
| * @return the count in the given bucket |
| */ |
| long get(int bucket) |
| { |
| return buckets.get(bucket); |
| } |
| |
| /** |
| * @param reset zero out buckets afterwards if true |
| * @return a long[] containing the current histogram buckets |
| */ |
| public long[] getBuckets(boolean reset) |
| { |
| final int len = buckets.length(); |
| long[] rv = new long[len]; |
| |
| if (reset) |
| for (int i = 0; i < len; i++) |
| rv[i] = buckets.getAndSet(i, 0L); |
| else |
| for (int i = 0; i < len; i++) |
| rv[i] = buckets.get(i); |
| |
| return rv; |
| } |
| |
| /** |
| * @return the smallest value that could have been added to this histogram |
| */ |
| public long min() |
| { |
| for (int i = 0; i < buckets.length(); i++) |
| { |
| if (buckets.get(i) > 0) |
| return i == 0 ? 0 : 1 + bucketOffsets[i - 1]; |
| } |
| return 0; |
| } |
| |
| /** |
| * @return the largest value that could have been added to this histogram. If the histogram |
| * overflowed, returns Long.MAX_VALUE. |
| */ |
| public long max() |
| { |
| int lastBucket = buckets.length() - 1; |
| if (buckets.get(lastBucket) > 0) |
| return Long.MAX_VALUE; |
| |
| for (int i = lastBucket - 1; i >= 0; i--) |
| { |
| if (buckets.get(i) > 0) |
| return bucketOffsets[i]; |
| } |
| return 0; |
| } |
| |
| /** |
| * @param percentile |
| * @return estimated value at given percentile |
| */ |
| public long percentile(double percentile) |
| { |
| assert percentile >= 0 && percentile <= 1.0; |
| int lastBucket = buckets.length() - 1; |
| if (buckets.get(lastBucket) > 0) |
| throw new IllegalStateException("Unable to compute when histogram overflowed"); |
| |
| long pcount = (long) Math.ceil(count() * percentile); |
| if (pcount == 0) |
| return 0; |
| |
| long elements = 0; |
| for (int i = 0; i < lastBucket; i++) |
| { |
| elements += buckets.get(i); |
| if (elements >= pcount) |
| return bucketOffsets[i]; |
| } |
| return 0; |
| } |
| |
| /** |
| * @return the ceil of mean histogram value (average of bucket offsets, weighted by count) |
| * @throws IllegalStateException if any values were greater than the largest bucket threshold |
| */ |
| public long mean() |
| { |
| return (long) Math.ceil(rawMean()); |
| } |
| |
| /** |
| * @return the mean histogram value (average of bucket offsets, weighted by count) |
| * @throws IllegalStateException if any values were greater than the largest bucket threshold |
| */ |
| public double rawMean() |
| { |
| int lastBucket = buckets.length() - 1; |
| if (buckets.get(lastBucket) > 0) |
| throw new IllegalStateException("Unable to compute ceiling for max when histogram overflowed"); |
| |
| long elements = 0; |
| long sum = 0; |
| for (int i = 0; i < lastBucket; i++) |
| { |
| long bCount = buckets.get(i); |
| elements += bCount; |
| sum += bCount * bucketOffsets[i]; |
| } |
| |
| return (double) sum / elements; |
| } |
| |
| /** |
| * @return the total number of non-zero values |
| */ |
| public long count() |
| { |
| long sum = 0L; |
| for (int i = 0; i < buckets.length(); i++) |
| sum += buckets.get(i); |
| return sum; |
| } |
| |
| /** |
| * @return the largest bucket offset |
| */ |
| public long getLargestBucketOffset() |
| { |
| return bucketOffsets[bucketOffsets.length - 1]; |
| } |
| |
| /** |
| * @return true if this histogram has overflowed -- that is, a value larger than our largest bucket could bound was added |
| */ |
| public boolean isOverflowed() |
| { |
| return buckets.get(buckets.length() - 1) > 0; |
| } |
| |
| /** |
| * log.debug() every record in the histogram |
| * |
| * @param log |
| */ |
| public void log(Logger log) |
| { |
| // only print overflow if there is any |
| int nameCount; |
| if (buckets.get(buckets.length() - 1) == 0) |
| nameCount = buckets.length() - 1; |
| else |
| nameCount = buckets.length(); |
| String[] names = new String[nameCount]; |
| |
| int maxNameLength = 0; |
| for (int i = 0; i < nameCount; i++) |
| { |
| names[i] = nameOfRange(bucketOffsets, i); |
| maxNameLength = Math.max(maxNameLength, names[i].length()); |
| } |
| |
| // emit log records |
| String formatstr = "%" + maxNameLength + "s: %d"; |
| for (int i = 0; i < nameCount; i++) |
| { |
| long count = buckets.get(i); |
| // sort-of-hack to not print empty ranges at the start that are only used to demarcate the |
| // first populated range. for code clarity we don't omit this record from the maxNameLength |
| // calculation, and accept the unnecessary whitespace prefixes that will occasionally occur |
| if (i == 0 && count == 0) |
| continue; |
| log.debug(String.format(formatstr, names[i], count)); |
| } |
| } |
| |
| private static String nameOfRange(long[] bucketOffsets, int index) |
| { |
| StringBuilder sb = new StringBuilder(); |
| appendRange(sb, bucketOffsets, index); |
| return sb.toString(); |
| } |
| |
| private static void appendRange(StringBuilder sb, long[] bucketOffsets, int index) |
| { |
| sb.append("["); |
| if (index == 0) |
| if (bucketOffsets[0] > 0) |
| // by original definition, this histogram is for values greater than zero only; |
| // if values of 0 or less are required, an entry of lb-1 must be inserted at the start |
| sb.append("1"); |
| else |
| sb.append("-Inf"); |
| else |
| sb.append(bucketOffsets[index - 1] + 1); |
| sb.append(".."); |
| if (index == bucketOffsets.length) |
| sb.append("Inf"); |
| else |
| sb.append(bucketOffsets[index]); |
| sb.append("]"); |
| } |
| |
| @Override |
| public boolean equals(Object o) |
| { |
| if (this == o) |
| return true; |
| |
| if (!(o instanceof EstimatedHistogram)) |
| return false; |
| |
| EstimatedHistogram that = (EstimatedHistogram) o; |
| return Arrays.equals(getBucketOffsets(), that.getBucketOffsets()) && |
| Arrays.equals(getBuckets(false), that.getBuckets(false)); |
| } |
| |
| @Override |
| public int hashCode() |
| { |
| return Objects.hashCode(getBucketOffsets(), getBuckets(false)); |
| } |
| |
| public static class EstimatedHistogramSerializer implements ISerializer<EstimatedHistogram> |
| { |
| public void serialize(EstimatedHistogram eh, DataOutputPlus out) throws IOException |
| { |
| long[] offsets = eh.getBucketOffsets(); |
| long[] buckets = eh.getBuckets(false); |
| out.writeInt(buckets.length); |
| for (int i = 0; i < buckets.length; i++) |
| { |
| out.writeLong(offsets[i == 0 ? 0 : i - 1]); |
| out.writeLong(buckets[i]); |
| } |
| } |
| |
| public EstimatedHistogram deserialize(DataInputPlus in) throws IOException |
| { |
| int size = in.readInt(); |
| long[] offsets = new long[size - 1]; |
| long[] buckets = new long[size]; |
| |
| for (int i = 0; i < size; i++) |
| { |
| offsets[i == 0 ? 0 : i - 1] = in.readLong(); |
| buckets[i] = in.readLong(); |
| } |
| return new EstimatedHistogram(offsets, buckets); |
| } |
| |
| public long serializedSize(EstimatedHistogram eh) |
| { |
| int size = 0; |
| |
| long[] offsets = eh.getBucketOffsets(); |
| long[] buckets = eh.getBuckets(false); |
| size += TypeSizes.sizeof(buckets.length); |
| for (int i = 0; i < buckets.length; i++) |
| { |
| size += TypeSizes.sizeof(offsets[i == 0 ? 0 : i - 1]); |
| size += TypeSizes.sizeof(buckets[i]); |
| } |
| return size; |
| } |
| } |
| } |