blob: d9757287afaabebbd56e2e4ae36f05c1ce6b53f2 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.characterization.uniquecount;
import static org.apache.datasketches.GaussianRanks.GAUSSIANS_3SD;
import static org.apache.datasketches.common.Util.milliSecToString;
import static org.apache.datasketches.common.Util.pwr2SeriesNext;
import java.io.PrintWriter;
import org.apache.datasketches.Job;
import org.apache.datasketches.JobProfile;
import org.apache.datasketches.Properties;
import org.apache.datasketches.characterization.BoundsAccuracyStats;
import org.apache.datasketches.quantiles.DoublesSketch;
/**
* @author Lee Rhodes
*/
public abstract class BaseBoundsAccuracyProfile implements JobProfile {
Job job;
PrintWriter pw;
public Properties prop;
public long vIn = 0;
int lgMinT;
int lgMaxT;
int tPPO;
int lgMinU;
int lgMaxU;
int uPPO;
int lgQK;
public int lgK;
boolean interData;
boolean postPMFs;
public boolean getSize = false;
public BoundsAccuracyStats[] qArr;
//JobProfile
@Override
public void start(final Job job) {
this.job = job;
pw = job.getPrintWriter();
prop = job.getProperties();
lgMinT = Integer.parseInt(prop.mustGet("Trials_lgMinT"));
lgMaxT = Integer.parseInt(prop.mustGet("Trials_lgMaxT"));
tPPO = Integer.parseInt(prop.mustGet("Trials_TPPO"));
lgMinU = Integer.parseInt(prop.mustGet("Trials_lgMinU"));
lgMaxU = Integer.parseInt(prop.mustGet("Trials_lgMaxU"));
interData = Boolean.parseBoolean(prop.mustGet("Trials_interData"));
postPMFs = Boolean.parseBoolean(prop.mustGet("Trials_postPMFs"));
uPPO = Integer.parseInt(prop.mustGet("Trials_UPPO"));
lgQK = Integer.parseInt(prop.mustGet("Trials_lgQK"));
qArr = BoundsAccuracyStats.buildAccuracyStatsArray(lgMinU, lgMaxU, uPPO, lgQK);
lgK = Integer.parseInt(prop.mustGet("LgK"));
configure();
doTrials();
shutdown();
cleanup();
}
@Override
public void shutdown() {}
@Override
public void cleanup() {}
//end JobProfile
public abstract void configure();
/**
* An accuracy trial is one pass through all uniques, pausing to store the estimate into a
* quantiles sketch at each point along the unique axis.
*/
public abstract void doTrial();
/**
* Manages multiple trials for measuring accuracy.
*
* <p>An accuracy trial is run along the count axis (X-axis) first. The "points" along the X-axis
* where accuracy data is collected is controlled by the data loaded into the CountAccuracyStats
* array. A single trial consists of a single sketch being updated with the Trials_lgMaxU unique
* values, stopping at the configured x-axis points along the way where the accuracy is recorded
* into the corresponding stats array. Each stats array retains the distribution of
* the accuracies measured for all the trials at that x-axis point.
*
* <p>Because accuracy trials take a long time, this profile will output intermediate
* accuracy results starting after Trials_lgMinT trials and then again at trial intervals
* determined by Trials_TPPO until Trials_lgMaxT. This allows you to stop the testing at
* any intermediate trials point if you feel you have sufficient trials for the accuracy you
* need.
*/
private void doTrials() {
final int minT = 1 << lgMinT;
final int maxT = 1 << lgMaxT;
final int maxU = 1 << lgMaxU;
//This will generate a table of data up for each intermediate Trials point
int lastT = 0;
while (lastT < maxT) {
final int nextT = lastT == 0 ? minT : (int)pwr2SeriesNext(tPPO, lastT);
final int delta = nextT - lastT;
for (int i = 0; i < delta; i++) {
doTrial();
}
lastT = nextT;
final StringBuilder sb = new StringBuilder();
if (nextT < maxT) { // intermediate
if (interData) {
job.println(getHeader());
process(qArr, lastT, sb);
job.println(sb.toString());
}
} else { //done
job.println(getHeader());
process(qArr, lastT, sb);
job.println(sb.toString());
}
job.println(prop.extractKvPairs());
job.println("Cum Trials : " + lastT);
job.println("Cum Updates : " + vIn);
final long currentTime_mS = System.currentTimeMillis();
final long cumTime_mS = currentTime_mS - job.getStartTime();
job.println("Cum Time : " + milliSecToString(cumTime_mS));
final double timePerTrial_mS = cumTime_mS * 1.0 / lastT;
final double avgUpdateTime_ns = timePerTrial_mS * 1e6 / maxU;
job.println("Time Per Trial, mSec : " + timePerTrial_mS);
job.println("Avg Update Time, nSec : " + avgUpdateTime_ns);
job.println("Date Time : "
+ job.getReadableDateString(currentTime_mS));
final long timeToComplete_mS = (long)(timePerTrial_mS * (maxT - lastT));
job.println("Est Time to Complete : " + milliSecToString(timeToComplete_mS));
job.println("Est Time at Completion : "
+ job.getReadableDateString(timeToComplete_mS + currentTime_mS));
job.println("");
if (postPMFs) {
for (int i = 0; i < qArr.length; i++) {
job.println(outputPMF(qArr[i]));
}
}
job.flush();
}
}
private static void process(final BoundsAccuracyStats[] qArr,
final int cumTrials, final StringBuilder sb) {
final int points = qArr.length;
sb.setLength(0);
for (int pt = 0; pt < points; pt++) {
final BoundsAccuracyStats q = qArr[pt];
final double uniques = q.trueValue;
final double relLb3 = q.sumLB3 / cumTrials / uniques - 1.0;
final double relLb2 = q.sumLB2 / cumTrials / uniques - 1.0;
final double relLb1 = q.sumLB1 / cumTrials / uniques - 1.0;
final double relUb1 = q.sumUB1 / cumTrials / uniques - 1.0;
final double relUb2 = q.sumUB2 / cumTrials / uniques - 1.0;
final double relUb3 = q.sumUB3 / cumTrials / uniques - 1.0;
//OUTPUT
sb.append(uniques).append(TAB);
//TRIALS
sb.append(cumTrials).append(TAB);
//Quantiles
final double[] quants = q.qskEst.getQuantiles(GAUSSIANS_3SD);
final int len = quants.length;
for (int i = 0; i < len; i++) {
sb.append(quants[i] / uniques - 1.0).append(TAB);
}
//Bound averages
sb.append(relLb3).append(TAB);
sb.append(relLb2).append(TAB);
sb.append(relLb1).append(TAB);
sb.append(relUb1).append(TAB);
sb.append(relUb2).append(TAB);
sb.append(relUb3).append(TAB);
sb.append(LS);
}
}
private static String getHeader() {
final StringBuilder sb = new StringBuilder();
sb.append("InU").append(TAB);
//Trials
sb.append("Trials").append(TAB);
//Quantiles
sb.append("Min").append(TAB);
sb.append("Q(.00135)").append(TAB);
sb.append("Q(.02275)").append(TAB);
sb.append("Q(.15866)").append(TAB);
sb.append("Q(.5)").append(TAB);
sb.append("Q(.84134)").append(TAB);
sb.append("Q(.97725)").append(TAB);
sb.append("Q(.99865)").append(TAB);
sb.append("Max").append(TAB);
sb.append("avgLB3").append(TAB);
sb.append("avgLB2").append(TAB);
sb.append("avgLB1").append(TAB);
sb.append("avgUB1").append(TAB);
sb.append("avgUB2").append(TAB);
sb.append("avgUB3").append(TAB);
return sb.toString();
}
/**
* Outputs the Probability Mass Function given the AccuracyStats.
* @param q the given AccuracyStats
*/
private static String outputPMF(final BoundsAccuracyStats q) {
final DoublesSketch qSk = q.qskEst;
final double[] splitPoints = qSk.getQuantiles(GAUSSIANS_3SD); //1:1
final double[] reducedSp = reduceSplitPoints(splitPoints);
final double[] pmfArr = qSk.getPMF(reducedSp); //pmfArr is one larger
final long trials = qSk.getN();
final StringBuilder sb = new StringBuilder();
//output Histogram
final String hdr = String.format("%10s%4s%12s", "Trials", " ", "Est");
final String fmt = "%10d%4s%12.2f";
sb.append("Histogram At " + q.trueValue).append(LS);
sb.append(hdr).append(LS);
for (int i = 0; i < reducedSp.length; i++) {
final int hits = (int)(pmfArr[i + 1] * trials);
final double est = reducedSp[i];
final String line = String.format(fmt, hits, " >= ", est);
sb.append(line).append(LS);
}
return sb.toString();
}
/**
*
* @param splitPoints the given splitPoints
* @return the reduced array of splitPoints
*/
private static double[] reduceSplitPoints(final double[] splitPoints) {
int num = 1;
double lastV = splitPoints[0];
for (int i = 0; i < splitPoints.length; i++) {
final double v = splitPoints[i];
if (v <= lastV) { continue; }
num++;
lastV = v;
}
lastV = splitPoints[0];
int idx = 0;
final double[] sp = new double[num];
sp[0] = lastV;
for (int i = 0; i < splitPoints.length; i++) {
final double v = splitPoints[i];
if (v <= lastV) { continue; }
sp[++idx] = v;
lastV = v;
}
return sp;
}
}