| /* |
| * Copyright 2015, Yahoo! Inc. |
| * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms. |
| */ |
| package com.yahoo.sketches.performance; |
| |
| import static java.lang.Math.abs; |
| import static java.lang.Math.sqrt; |
| |
| import java.util.Arrays; |
| |
| /** |
| * Processes the statistics collected from an array of Stats objects from a trial set |
| * and creates an output row |
| * |
| * @author Lee Rhodes |
| */ |
| public class ProcessStats { |
| private static final char TAB = '\t'; |
| //Quantile fractions computed from the standard normal cumulative distribution. |
| private static final double M2SD = 0.022750131948179; //minus 2 StdDev |
| private static final double M1SD = 0.158655253931457; //minus 1 StdDev |
| private static final double P1SD = 0.841344746068543; //plus 1 StdDev |
| private static final double P2SD = 0.977249868051821; //plus 2 StdDev |
| |
| /** |
| * Process the Stats[] array and place the output row into the dataStr. |
| * @param statsArr the input Stats array |
| * @param uPerTrial the number of uniques per trial for this trial set. |
| * @param lgK log base 2 of configured nominal entries, or k. |
| * @param p the probability sampling rate. 0 < p ≤ 1.0. |
| * @param dataStr The StringBuilder object that is reused for each row of output |
| */ |
| public static void process(Stats[] statsArr, int uPerTrial, int lgK, double p, StringBuilder dataStr) { |
| int k = 1 << lgK; |
| int trials = statsArr.length; |
| Arrays.sort(statsArr, 0, trials); |
| |
| //Computing the quantiles from the sorted array. |
| double min = statsArr[0].re; |
| double qM2SD = statsArr[quantileIndex(M2SD,trials)].re; |
| double qM1SD = statsArr[quantileIndex(M1SD,trials)].re; |
| double q50 = statsArr[quantileIndex(.5,trials)].re; |
| double qP1SD = statsArr[quantileIndex(P1SD,trials)].re; |
| double qP2SD = statsArr[quantileIndex(P2SD,trials)].re; |
| double max = statsArr[trials-1].re; |
| |
| int cntLB2 = 0, cntLB1 = 0, cntUB1 = 0, cntUB2 = 0; |
| // double sumLB2 = 0, sumLB1 = 0, sumUB1 = 0, sumUB2 = 0; |
| double sumEst = 0, sumEstErr = 0, sumSqEstErr = 0; |
| double sumUpdateTimePerU_nS = 0; |
| //Scan the sorted statsArr |
| for (int i=0; i<trials; i++) { |
| Stats stats = statsArr[i]; |
| if (uPerTrial > stats.ub2est) cntUB2++; //should be < 2.275%; under estimate |
| if (uPerTrial > stats.ub1est) cntUB1++; //should be < 15.866%; under estimate |
| if (uPerTrial < stats.lb1est) cntLB1++; //should be < 15.866%; over estimate |
| if (uPerTrial < stats.lb2est) cntLB2++; //should be < 2.275%; over estimate |
| // sumLB2 += stats.lb2est; |
| // sumLB1 += stats.lb1est; |
| // sumUB1 += stats.ub1est; |
| // sumUB2 += stats.ub2est; |
| //divide by uPerTrial to normalize betweeen 0 and 1.0, sum over all trials |
| //Components for the mean and variance of the estimate error |
| sumEst += statsArr[i].estimate; |
| double estErr = statsArr[i].re; |
| sumEstErr += estErr; |
| sumSqEstErr += estErr*estErr; |
| |
| sumUpdateTimePerU_nS += statsArr[i].updateTimePerU_nS; |
| } |
| //normalize counts |
| double fracTgtUB2 = (double)cntUB2/trials; |
| double fracTgtUB1 = (double)cntUB1/trials; |
| double fracTltLB1 = (double)cntLB1/trials; |
| double fracTltLB2 = (double)cntLB2/trials; |
| |
| //Compute the average results over the trial set |
| double meanEst = sumEst/trials; |
| double meanEstErr = sumEstErr/trials; |
| double deltaSqEstErr = abs(sumSqEstErr - (sumEstErr*sumEstErr)/trials); |
| double varEstErr = (trials == 1)? deltaSqEstErr/trials : deltaSqEstErr/(trials-1); |
| double rse = sqrt(varEstErr); |
| //compute theoretical sketch RSE |
| double invKm1 = 1.0/(k-1); |
| double oneMinusKoverN = 1.0 - (double)k/uPerTrial; |
| double thrse = (sumEstErr == 0.0)? 0.0 : sqrt(invKm1 * oneMinusKoverN); |
| //compute Bernoulli RSE |
| double invUperTrial = 1.0/uPerTrial; |
| double varOverN = (p == 1.0)? 0.0 : 1.0/p - 1.0; |
| double prse = (p == 1.0)? 0.0 : sqrt(invUperTrial * varOverN); |
| |
| //Compute average of each of the bounds estimates |
| // double meanLB2est = sumLB2/(uPerTrial*trials) -1; |
| // double meanLB1est = sumLB1/(uPerTrial*trials) -1; |
| // double meanUB1est = sumUB1/(uPerTrial*trials) -1; |
| // double meanUB2est = sumUB2/(uPerTrial*trials) -1; |
| |
| //Speed |
| double meanUpdateTimePerU_nS = sumUpdateTimePerU_nS/trials; |
| |
| //OUTPUT |
| dataStr.setLength(0); |
| dataStr.append(uPerTrial).append(TAB). |
| |
| //Sketch estimates, mean, variance |
| append(meanEst).append(TAB). |
| append(meanEstErr).append(TAB). |
| append(rse).append(TAB). |
| append(thrse).append(TAB). |
| append(prse).append(TAB). |
| |
| //Quantiles measured from the actual distribution of values from all trials. |
| //Because of quantization effects these values will be noisier than the values |
| //computed statistically above. |
| append(min).append(TAB). |
| append(qM2SD).append(TAB). |
| append(qM1SD).append(TAB). |
| append(q50).append(TAB). |
| append(qP1SD).append(TAB). |
| append(qP2SD).append(TAB). |
| append(max).append(TAB). |
| |
| //Fractional Bounds measurements |
| append(fracTltLB2).append(TAB). |
| append(fracTltLB1).append(TAB). |
| append(fracTgtUB1).append(TAB). |
| append(fracTgtUB2).append(TAB). |
| |
| //The bounds estimates are computed mathematically based on the sketch |
| // estimate, the number of valid values in the cache and the value of theta. |
| // Because of this thes values will be relatively smooth from point to point along the |
| // unique value axis. |
| // append(meanLB2est).append(TAB). |
| // append(meanLB1est).append(TAB). |
| // append(meanUB1est).append(TAB). |
| // append(meanUB2est).append(TAB). |
| //Trials |
| append(trials).append(TAB). |
| //Speed |
| append(meanUpdateTimePerU_nS); |
| } |
| |
| /** |
| * Returns a column header row |
| * @return a column header row |
| */ |
| public static String getHeader() { |
| StringBuilder sb = new StringBuilder(); |
| sb. append("InU").append(TAB). |
| //Estimates |
| append("MeanEst").append(TAB). |
| append("MeanErr").append(TAB). |
| append("RSE").append(TAB). |
| append("thRSE").append(TAB). |
| append("pRSE").append(TAB). |
| //Quantiles |
| append("Min").append(TAB). |
| append("QM2SD").append(TAB). |
| append("QM1SD").append(TAB). |
| append("Q50").append(TAB). |
| append("QP1SD").append(TAB). |
| append("QP2SD").append(TAB). |
| append("Max").append(TAB). |
| //Fractional Bounds measurements |
| append("FracTltLB2").append(TAB). |
| append("FracTltLB1").append(TAB). |
| append("FracTgtUB1").append(TAB). |
| append("FracTgtUB2").append(TAB). |
| |
| //Trials |
| append("Trials").append(TAB). |
| //Speed |
| append("nS/u"); |
| return sb.toString(); |
| } |
| |
| /** |
| * Returns the trial index = floor(quantile-fraction, #trials) |
| * @param frac the desired quantile fraction (0.0 - 1.0) |
| * @param trials the number of total trials |
| * @return the trial index |
| */ |
| private static int quantileIndex(double frac, int trials) { |
| int idx1 = (int) Math.floor(frac*trials); |
| return (idx1 >= trials)? trials-1: idx1; |
| } |
| } |