Removed misc code in test to sketches-misc repo.
diff --git a/src/test/java/com/yahoo/sketches/BinomialBoundsNTest.java b/src/test/java/com/yahoo/sketches/BinomialBoundsNTest.java
index 1d1745f..8519398 100644
--- a/src/test/java/com/yahoo/sketches/BinomialBoundsNTest.java
+++ b/src/test/java/com/yahoo/sketches/BinomialBoundsNTest.java
@@ -136,6 +136,22 @@
}
@Test
+ public void boundsExample() {
+ println("BinomialBoundsN Example:");
+ int k = 500;
+ double theta = 0.001;
+ int stdDev = 2;
+ double ub = BinomialBoundsN.getUpperBound(k, theta, stdDev, false);
+ double est = k/theta;
+ double lb = BinomialBoundsN.getLowerBound(k, theta, stdDev, false);
+ println("K="+k+", Theta="+theta+", SD="+stdDev);
+ println("UB: "+ub);
+ println("Est: "+est);
+ println("LB: "+lb);
+ println("");
+ }
+
+ @Test
public void printlnTest() {
println("PRINTING: "+this.getClass().getName());
}
diff --git a/src/test/java/com/yahoo/sketches/benchmark/BenchmarkMain.java b/src/test/java/com/yahoo/sketches/benchmark/BenchmarkMain.java
deleted file mode 100644
index bf7981a..0000000
--- a/src/test/java/com/yahoo/sketches/benchmark/BenchmarkMain.java
+++ /dev/null
@@ -1,160 +0,0 @@
-package com.yahoo.sketches.benchmark;
-
-import com.yahoo.sketches.hll.HllSketch;
-import com.yahoo.sketches.hll.HllSketchBuilder;
-import com.yahoo.sketches.hll.Preamble;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Random;
-
-/**
- */
-public class BenchmarkMain
-{
- @SuppressWarnings("serial")
- public static void main(String[] args)
- {
- final int lgK = 12;
-
- List<SketchBenchmark> benchmarks = new ArrayList<SketchBenchmark>(){{
- this.add(new ThetaMemoryBenchmark(lgK));
- this.add(new ThetaBenchmark(lgK));
-
- HllSketchBuilder sparseBob = HllSketch.builder().setPreamble(Preamble.fromLogK(lgK));
- HllSketchBuilder denseBob = sparseBob.copy().setDenseMode(true);
- this.add(new HllSketchBenchmark("HLL Sketch", new Random(lgK), sparseBob, denseBob));
- this.add(
- new HllSketchBenchmark(
- "HLL Non-Compressed to Compressed",
- new Random(lgK), sparseBob, denseBob.copy().setCompressedDense(true)
- )
- );
- this.add(
- new HllSketchBenchmark(
- "HLL Compressed to Non-Compressed",
- new Random(lgK), sparseBob.copy().setCompressedDense(true), denseBob
- )
- );
- this.add(
- new HllSketchBenchmark(
- "HLL All Compressed",
- new Random(lgK), denseBob.copy().setCompressedDense(true), denseBob.copy().setCompressedDense(true)
- )
- );
- }};
-
- runBenchmarks(benchmarks, 20, 100, powerLawDistribution);
- }
-
- private static void runBenchmarks(
- List<SketchBenchmark> benchmarks,
- int increment,
- int numTimes,
- List<SketchBenchmark.Spec> distribution
- )
- {
- int numSketches = 0;
- for (SketchBenchmark.Spec spec : distribution) {
- numSketches += spec.getNumSketches();
- }
-
- for (SketchBenchmark benchmark : benchmarks) {
- System.out.printf("Starting benchmark[%s]%n", benchmark);
- long start = System.currentTimeMillis();
- benchmark.setup(numSketches, powerLawDistribution);
- System.out.printf("benchmark[%s] setup done in %,d millis.%n", benchmark, System.currentTimeMillis() - start);
- start = System.currentTimeMillis();
- benchmark.runNTimes(increment);
- System.out.printf("benchmark[%s] priming[%s] done in %,d millis.%n", benchmark, increment, System.currentTimeMillis() - start);
- doGC();
-
-
- for (int i = 0; i < numTimes; i+=increment) {
- start = System.currentTimeMillis();
- benchmark.runNTimes(increment);
- long time = System.currentTimeMillis() - start;
- System.out.printf(
- "Benchmark[%s], %,d runs => %,d millis (%,d ms/run), %,d/sec%n",
- benchmark,
- i + increment,
- time,
- (int) (time / (double) increment),
- (int) ((1000 / (time / (double) increment)) * numSketches)
- );
- doGC();
- }
- System.out.printf("Done with benchmark[%s]%n", benchmark);
- }
- }
-
- private static void doGC()
- {
- for (int i = 0; i < 10; ++i) {
- System.gc();
- }
- }
-
-
- @SuppressWarnings("serial")
- public static List<SketchBenchmark.Spec> powerLawDistribution = new ArrayList<SketchBenchmark.Spec>(){{
- this.add(new SketchBenchmark.Spec(0, 44129));
- this.add(new SketchBenchmark.Spec(1, 431561));
- this.add(new SketchBenchmark.Spec(2, 129063));
- this.add(new SketchBenchmark.Spec(3, 64821));
- this.add(new SketchBenchmark.Spec(4, 67522));
- this.add(new SketchBenchmark.Spec(6, 20291));
- this.add(new SketchBenchmark.Spec(7, 15767));
- this.add(new SketchBenchmark.Spec(8, 22975));
- this.add(new SketchBenchmark.Spec(11, 22441));
- this.add(new SketchBenchmark.Spec(14, 14531));
- this.add(new SketchBenchmark.Spec(17, 13472));
- this.add(new SketchBenchmark.Spec(22, 13253));
- this.add(new SketchBenchmark.Spec(28, 9002));
- this.add(new SketchBenchmark.Spec(35, 8406));
- this.add(new SketchBenchmark.Spec(45, 7618));
- this.add(new SketchBenchmark.Spec(57, 6349));
- this.add(new SketchBenchmark.Spec(71, 5194));
- this.add(new SketchBenchmark.Spec(89, 4524));
- this.add(new SketchBenchmark.Spec(112, 4032));
- this.add(new SketchBenchmark.Spec(141, 3397));
- this.add(new SketchBenchmark.Spec(178, 2935));
- this.add(new SketchBenchmark.Spec(224, 2516));
- this.add(new SketchBenchmark.Spec(282, 2118));
- this.add(new SketchBenchmark.Spec(355, 1825));
- this.add(new SketchBenchmark.Spec(447, 1527));
- this.add(new SketchBenchmark.Spec(561, 1269));
- this.add(new SketchBenchmark.Spec(709, 1088));
- this.add(new SketchBenchmark.Spec(890, 900));
- this.add(new SketchBenchmark.Spec(1118, 767));
- this.add(new SketchBenchmark.Spec(1410, 654));
- this.add(new SketchBenchmark.Spec(1776, 550));
- this.add(new SketchBenchmark.Spec(2246, 469));
- this.add(new SketchBenchmark.Spec(2813, 353));
- this.add(new SketchBenchmark.Spec(3552, 325));
- this.add(new SketchBenchmark.Spec(4472, 252));
- this.add(new SketchBenchmark.Spec(5639, 249));
- this.add(new SketchBenchmark.Spec(7022, 187));
- this.add(new SketchBenchmark.Spec(8952, 150));
- this.add(new SketchBenchmark.Spec(11270, 138));
- this.add(new SketchBenchmark.Spec(14198, 106));
- this.add(new SketchBenchmark.Spec(17544, 74));
- this.add(new SketchBenchmark.Spec(22145, 81));
- this.add(new SketchBenchmark.Spec(27848, 50));
- this.add(new SketchBenchmark.Spec(35319, 58));
- this.add(new SketchBenchmark.Spec(44267, 33));
- this.add(new SketchBenchmark.Spec(55292, 22));
- this.add(new SketchBenchmark.Spec(72264, 10));
- this.add(new SketchBenchmark.Spec(88903, 13));
- this.add(new SketchBenchmark.Spec(111538, 12));
- this.add(new SketchBenchmark.Spec(136481, 11));
- this.add(new SketchBenchmark.Spec(178605, 6));
- this.add(new SketchBenchmark.Spec(215707, 5));
- this.add(new SketchBenchmark.Spec(273075, 5));
- this.add(new SketchBenchmark.Spec(362878, 5));
- this.add(new SketchBenchmark.Spec(546015, 1));
- this.add(new SketchBenchmark.Spec(1106004, 2));
- this.add(new SketchBenchmark.Spec(1766259, 2));
- }};
-
-}
diff --git a/src/test/java/com/yahoo/sketches/benchmark/HllSketchBenchmark.java b/src/test/java/com/yahoo/sketches/benchmark/HllSketchBenchmark.java
deleted file mode 100644
index 8945308..0000000
--- a/src/test/java/com/yahoo/sketches/benchmark/HllSketchBenchmark.java
+++ /dev/null
@@ -1,69 +0,0 @@
-package com.yahoo.sketches.benchmark;
-
-import com.yahoo.sketches.hll.HllSketch;
-import com.yahoo.sketches.hll.HllSketchBuilder;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Random;
-
-/**
- */
-public class HllSketchBenchmark implements SketchBenchmark
-{
- private final String name;
- private final Random rand;
- private final HllSketchBuilder inputBob;
- private final HllSketchBuilder unionBob;
-
- private List<HllSketch> sketches;
-
- public HllSketchBenchmark(String name, Random rand, HllSketchBuilder inputBob, HllSketchBuilder unionBob)
- {
- this.name = name;
- this.rand = rand;
- this.inputBob = inputBob;
- this.unionBob = unionBob;
- }
-
- @Override
- public void setup(int numSketches, List<Spec> specs)
- {
- sketches = new ArrayList<>(numSketches);
-
- for (Spec spec : specs) {
- for (int i = 0; i < spec.getNumSketches(); ++i) {
- HllSketch sketch = inputBob.build();
- for (int j = 0; j < spec.getNumEntries(); ++j) {
- sketch.update(new long[]{rand.nextLong()});
- }
- sketches.add(sketch.asCompact());
- }
- }
- Collections.shuffle(sketches);
- }
-
- @Override
- public void runNTimes(int n)
- {
- for (int i = 0; i < n; ++i) {
- HllSketch combined = unionBob.build();
- for (HllSketch toUnion : sketches) {
- combined.union(toUnion);
- }
- }
- }
-
- @Override
- public void reset()
- {
- sketches = null;
- }
-
- @Override
- public String toString()
- {
- return name;
- }
-}
diff --git a/src/test/java/com/yahoo/sketches/benchmark/SketchBenchmark.java b/src/test/java/com/yahoo/sketches/benchmark/SketchBenchmark.java
deleted file mode 100644
index c59eee5..0000000
--- a/src/test/java/com/yahoo/sketches/benchmark/SketchBenchmark.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package com.yahoo.sketches.benchmark;
-
-import java.util.List;
-
-/**
- */
-public interface SketchBenchmark
-{
- void setup(int numSketches, List<Spec> specs);
- void runNTimes(int n);
- void reset();
-
- class Spec {
- private final int numSketches;
- private final long numEntries;
-
- public Spec(long numEntries, int numSketches) {
-
- this.numSketches = numSketches;
- this.numEntries = numEntries;
- }
-
- public int getNumSketches()
- {
- return numSketches;
- }
-
- public long getNumEntries()
- {
- return numEntries;
- }
- }
-}
diff --git a/src/test/java/com/yahoo/sketches/benchmark/ThetaBenchmark.java b/src/test/java/com/yahoo/sketches/benchmark/ThetaBenchmark.java
deleted file mode 100644
index 183a674..0000000
--- a/src/test/java/com/yahoo/sketches/benchmark/ThetaBenchmark.java
+++ /dev/null
@@ -1,81 +0,0 @@
-package com.yahoo.sketches.benchmark;
-
-import com.yahoo.sketches.theta.CompactSketch;
-import com.yahoo.sketches.theta.SetOperation;
-import com.yahoo.sketches.theta.Sketch;
-import com.yahoo.sketches.theta.Union;
-import com.yahoo.sketches.theta.UpdateSketch;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Random;
-
-/**
- */
-public class ThetaBenchmark implements SketchBenchmark
-{
- private final int nominalEntries;
- private final Random rand;
-
- private List<CompactSketch> sketches;
-
- public ThetaBenchmark(int lgK) {
- this.nominalEntries = 1 << lgK;
- this.rand = new Random(lgK);
- }
-
- @Override
- public void setup(int numSketches, List<Spec> specs)
- {
- sketches = new ArrayList<>(numSketches);
-
- for (Spec spec : specs) {
- for (int i = 0; i < spec.getNumSketches(); ++i) {
- UpdateSketch sketch = UpdateSketch.builder().build(nominalEntries);
- for (int j = 0; j < spec.getNumEntries(); ++j) {
- sketch.update(rand.nextLong());
- }
-
- sketches.add(sketch.rebuild().compact(true, null));
- }
- }
- Collections.shuffle(sketches, rand);
-
- int numRetained = 0;
- int numEstimating = 0;
- for (CompactSketch sketch : sketches) {
- numRetained += sketch.getRetainedEntries(true);
- if (sketch.isEstimationMode()) {
- ++numEstimating;
- }
- }
- System.out.printf(
- "%,d entries, %,d/sketch, %,d estimating (%.2f%%)%n",
- numRetained, numRetained / sketches.size(), numEstimating, (100 * numEstimating) / (double) sketches.size()
- );
- }
-
- @Override
- public void runNTimes(int n)
- {
- for (int i = 0; i < n; ++i) {
- Union combined = SetOperation.builder().buildUnion(nominalEntries);
- for (Object toUnion : sketches) {
- combined.update((Sketch) toUnion);
- }
- }
- }
-
- @Override
- public void reset()
- {
- sketches = null;
- }
-
- @Override
- public String toString()
- {
- return String.format("Theta OnHeap Benchmark(nominalEntries=%s)", nominalEntries);
- }
-}
diff --git a/src/test/java/com/yahoo/sketches/benchmark/ThetaMemoryBenchmark.java b/src/test/java/com/yahoo/sketches/benchmark/ThetaMemoryBenchmark.java
deleted file mode 100644
index c8f33d7..0000000
--- a/src/test/java/com/yahoo/sketches/benchmark/ThetaMemoryBenchmark.java
+++ /dev/null
@@ -1,84 +0,0 @@
-package com.yahoo.sketches.benchmark;
-
-import com.yahoo.sketches.memory.Memory;
-import com.yahoo.sketches.memory.NativeMemory;
-import com.yahoo.sketches.theta.SetOperation;
-import com.yahoo.sketches.theta.Sketch;
-import com.yahoo.sketches.theta.Union;
-import com.yahoo.sketches.theta.UpdateSketch;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Random;
-
-/**
- */
-public class ThetaMemoryBenchmark implements SketchBenchmark
-{
- private final int nominalEntries;
- private final Random rand;
- private final byte[] bytes;
-
- private List<Memory> memories;
-
- public ThetaMemoryBenchmark(int lgK) {
- this.nominalEntries = 1 << lgK;
- this.rand = new Random(lgK);
- this.bytes = new byte[Sketch.getMaxUpdateSketchBytes(nominalEntries) + 8];
- }
-
- @Override
- public void setup(int numSketches, List<Spec> specs)
- {
- memories = new ArrayList<>(numSketches);
-
- for (Spec spec : specs) {
- for (int i = 0; i < spec.getNumSketches(); ++i) {
- UpdateSketch sketch = UpdateSketch.builder().build(nominalEntries);
- for (int j = 0; j < spec.getNumEntries(); ++j) {
- sketch.update(rand.nextLong());
- }
- memories.add(new NativeMemory(sketch.rebuild().compact(true, null).toByteArray()));
- }
- }
- Collections.shuffle(memories, rand);
-
- int numRetained = 0;
- int numEstimating = 0;
- for (Memory mem : memories) {
- Sketch sketch = Sketch.wrap(mem);
- numRetained += sketch.getRetainedEntries(true);
- if (sketch.isEstimationMode()) {
- ++numEstimating;
- }
- }
- System.out.printf(
- "%,d entries, %,d/sketch, %,d estimating (%.2f%%)%n",
- numRetained, numRetained / memories.size(), numEstimating, (100 * numEstimating) / (double) memories.size()
- );
- }
-
- @Override
- public void runNTimes(int n)
- {
- for (int i = 0; i < n; ++i) {
- Union combined = SetOperation.builder().initMemory(new NativeMemory(bytes)).buildUnion(nominalEntries);
- for (Memory toUnion : memories) {
- combined.update(toUnion);
- }
- }
- }
-
- @Override
- public void reset()
- {
- memories = null;
- }
-
- @Override
- public String toString()
- {
- return String.format("Theta Memory Benchmark(nominalEntries=%s)", nominalEntries);
- }
-}
diff --git a/src/test/java/com/yahoo/sketches/cmd/CommandLine.java b/src/test/java/com/yahoo/sketches/cmd/CommandLine.java
deleted file mode 100644
index dc28ade..0000000
--- a/src/test/java/com/yahoo/sketches/cmd/CommandLine.java
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.cmd;
-
-import static com.yahoo.sketches.Util.LS;
-import static com.yahoo.sketches.Util.TAB;
-import static java.lang.Math.*;
-
-import java.io.BufferedReader;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.FileNotFoundException;
-import java.io.InputStreamReader;
-
-import com.yahoo.sketches.theta.Sketches;
-import com.yahoo.sketches.theta.UpdateSketch;
-import com.yahoo.sketches.theta.UpdateSketchBuilder;
-import com.yahoo.sketches.quantiles.QuantilesSketchBuilder;
-import com.yahoo.sketches.quantiles.QuantilesSketch;
-import com.yahoo.sketches.frequencies.FrequentItemsSketch;
-import com.yahoo.sketches.frequencies.FrequentLongsSketch.Row;
-import com.yahoo.sketches.frequencies.ErrorType;
-
-/**
- * Command line access to the basic sketch functions.
- */
-public class CommandLine {
- private static final String BOLD = "\033[1m";
- private static final String OFF = "\033[0m";
-
- public static void main(String[] args) {
- if (args.length == 0) help();
- else parseType(args);
- }
-
- static void parseType(String[] args) {
- String token1 = args[0].toLowerCase();
- switch (token1) {
- case "uniq": parseUniq(args); break;
- case "rank": parseRank(args); break;
- case "hist": parseHist(args); break;
- case "loghist": parseLogHist(args); break;
- case "freq": parseFreq(args); break;
- case "help": help(); break;
- default: {
- printlnErr("Unrecognized TYPE: "+token1);
- help();
- }
- }
- }
-
- private static int parseArgsCase(String[] args) { //we already know type, args[0] is valid
- int len = args.length;
- int ret = 0;
- switch (len) {
- case 1: ret = 1; break; //only type, assume default k, System.in
- case 2: {
- String token2 = args[1]; //2nd arg could be help, k (numeric) or a fileName
- if (token2.equalsIgnoreCase("help")) { ret = 2; break; } //help
- if (!isNumeric(token2)) { ret = 3; break; } //2nd arg not numeric, must be a filename
- ret = 4; //2nd arg must be numeric, assume System.in
- break;
- }
- default: { //3 or more
- String token2 = args[1]; //2nd arg could be help, k (numeric) or a fileName
- if (token2.equalsIgnoreCase("help")) { ret = 2; break; } //help
- if (!isNumeric(token2)) { ret = 3; break; } //2nd arg not numeric, must be a filename
- //2nd arg is numeric, 3rd arg must be filename
- ret = 5;
- break;
- }
- }
- return ret;
- }
-
- private static void parseUniq(String[] args) {
- UpdateSketchBuilder bldr = Sketches.updateSketchBuilder();
- UpdateSketch sketch;
- int argsCase = parseArgsCase(args);
- switch (argsCase) {
- case 1:
- doUniq(getBR(null), bldr.build()); break; //[default k], [System.in]
- case 2:
- uniqHelp(); break; //help
- case 3: //2nd arg not numeric, must be a filename
- doUniq(getBR(args[1]), bldr.build()); break; //[default k], file
- case 4: //2nd arg is numeric, no filename
- sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k
- doUniq(getBR(null), sketch); //user k, [System.in]
- break;
- case 5: //3 valid args
- sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k
- doUniq(getBR(args[2]), sketch);
- }
- }
-
- private static void doUniq(BufferedReader br, UpdateSketch sketch) {
- String itemStr = "";
- try {
- while ((itemStr = br.readLine()) != null) {
- sketch.update(itemStr);
- }
- } catch (IOException e) {
- printlnErr("Read Error: Item: "+itemStr +", "+br.toString());
- System.exit(1);
- }
- println(sketch.toString());
- }
-
- private static void parseRank(String[] args) {
- QuantilesSketchBuilder bldr = new QuantilesSketchBuilder();
- QuantilesSketch sketch;
- int argsCase = parseArgsCase(args);
- switch (argsCase) {
- case 1:
- doRank(getBR(null), bldr.build()); break; //[default k], [System.in]
- case 2:
- rankHelp(); break; //help
- case 3: //2nd arg not numeric, must be a filename
- doRank(getBR(args[1]), bldr.build()); break; //[default k], file
- case 4: //2nd arg is numeric, no filename
- sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k
- doRank(getBR(null), sketch); //user k, [System.in]
- break;
- case 5: //3 valid args
- sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k
- doRank(getBR(args[2]), sketch);
- }
- }
-
- private static void doRank(BufferedReader br, QuantilesSketch sketch) {
- String itemStr = "";
- try {
- while ((itemStr = br.readLine()) != null) {
- double item = Double.parseDouble(itemStr);
- sketch.update(item);
- }
- } catch (IOException | NumberFormatException e ) {
- printlnErr("Read Error: Item: "+itemStr +", "+br.toString());
- System.exit(1);
- }
- int ranks = 101;
- double[] valArr = sketch.getQuantiles(ranks);
- println("Rank"+TAB+ "Value");
- for (int i=0; i<ranks; i++) {
- String r = String.format("%.2f",(double)i/ranks);
- println(r + TAB + valArr[i]);
- }
- }
-
- private static void parseHist(String[] args) {
- QuantilesSketchBuilder bldr = new QuantilesSketchBuilder();
- QuantilesSketch sketch;
- int argsCase = parseArgsCase(args);
- switch (argsCase) {
- case 1:
- doHist(getBR(null), bldr.build()); break; //[default k], [System.in]
- case 2:
- histHelp(); break; //help
- case 3: //2nd arg not numeric, must be a filename
- doHist(getBR(args[1]), bldr.build()); break; //[default k], file
- case 4: //2nd arg is numeric, no filename
- sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k
- doHist(getBR(null), sketch); //user k, [System.in]
- break;
- case 5: //3 valid args
- sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k
- doHist(getBR(args[2]), sketch);
- }
- }
-
- private static void doHist(BufferedReader br, QuantilesSketch sketch) {
- String itemStr = "";
- try {
- while ((itemStr = br.readLine()) != null) {
- double item = Double.parseDouble(itemStr);
- sketch.update(item);
- }
- } catch (IOException | NumberFormatException e ) {
- printlnErr("Read Error: Item: "+itemStr +", "+br.toString());
- System.exit(1);
- }
- int splitPoints = 30;
- long n = sketch.getN();
- double[] splitsArr = getEvenSplits(sketch, splitPoints);
- double[] histArr = sketch.getPMF(splitsArr);
- println("Value"+TAB+ "Freq");
- //int histArrLen = histArr.length; //one larger than splitsArr
- double min = sketch.getMinValue();
- String splitVal = String.format("%,f", min);
- String freqVal = String.format("%,d", (long)(histArr[0] * n));
- println(splitVal+TAB+freqVal);
- for (int i=0; i<splitsArr.length; i++) {
- splitVal = String.format("%,f", splitsArr[i] * n);
- freqVal = String.format("%,d", (long)(histArr[i+1] * n));
- println(splitVal+TAB+freqVal);
- }
- }
-
- private static void parseLogHist(String[] args) {
- QuantilesSketchBuilder bldr = new QuantilesSketchBuilder();
- QuantilesSketch sketch;
- int argsCase = parseArgsCase(args);
- switch (argsCase) {
- case 1:
- doLogHist(getBR(null), bldr.build()); break; //[default k], [System.in]
- case 2:
- logHistHelp(); break; //help
- case 3: //2nd arg not numeric, must be a filename
- doLogHist(getBR(args[1]), bldr.build()); break; //[default k], file
- case 4: //2nd arg is numeric, no filename
- sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k
- doLogHist(getBR(null), sketch); //user k, [System.in]
- break;
- case 5: //3 valid args
- sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k
- doLogHist(getBR(args[2]), sketch);
- }
- }
-
- private static void doLogHist(BufferedReader br, QuantilesSketch sketch) {
- String itemStr = "";
- try {
- while ((itemStr = br.readLine()) != null) {
- double item = Double.parseDouble(itemStr);
- if (Double.isNaN(item) || (item <= 0.0)) continue;
- sketch.update(item);
- }
- } catch (IOException | NumberFormatException e ) {
- printlnErr("Read Error: Item: "+itemStr +", "+br.toString());
- System.exit(1);
- }
- int splitPoints = 30;
- long n = sketch.getN();
- double[] splitsArr = getLogSplits(sketch, splitPoints);
- double[] histArr = sketch.getPMF(splitsArr);
- println("Value"+TAB+ "Freq");
- //int histArrLen = histArr.length; //one larger than splitsArr
- double min = sketch.getMinValue();
- String splitVal = String.format("%,f", min);
- String freqVal = String.format("%,d", (long)(histArr[0] * n));
- println(splitVal+TAB+freqVal);
- for (int i=0; i<splitsArr.length; i++) {
- splitVal = String.format("%,f", splitsArr[i] * n);
- freqVal = String.format("%,d", (long)(histArr[i+1] * n));
- println(splitVal+TAB+freqVal);
- }
- }
-
- private static void parseFreq(String[] args) {
- FrequentItemsSketch<String> sketch;
- int defaultSize = 1 << 17; //128K
- int argsCase = parseArgsCase(args);
- switch (argsCase) {
- case 1:
- sketch = new FrequentItemsSketch<String>(defaultSize);
- doFreq(getBR(null), sketch); break; //[default k], [System.in]
- case 2:
- freqHelp(); break; //help
- case 3: //2nd arg not numeric, must be a filename
- sketch = new FrequentItemsSketch<String>(defaultSize);
- doFreq(getBR(args[1]), sketch); break; //[default k], file
- case 4: //2nd arg is numeric, no filename
- sketch = new FrequentItemsSketch<String>(Integer.parseInt(args[1])); //args[1] is numeric = k
- doFreq(getBR(null), sketch); //user k, [System.in]
- break;
- case 5: //3 valid args
- sketch = new FrequentItemsSketch<String>(Integer.parseInt(args[1])); //args[1] is numeric = k
- doFreq(getBR(args[2]), sketch);
- }
- }
-
- private static void doFreq(BufferedReader br, FrequentItemsSketch<String> sketch) {
- String itemStr = "";
- try {
- while ((itemStr = br.readLine()) != null) {
- sketch.update(itemStr);
- }
- } catch (IOException e ) {
- printlnErr("Read Error: Item: "+itemStr +", "+br.toString());
- System.exit(1);
- }
- //NFP is a subset of NFN
- FrequentItemsSketch<String>.Row[] rowArr = sketch.getFrequentItems(ErrorType.NO_FALSE_POSITIVES);
- int len = rowArr.length;
- println("Qualifying Rows: "+len);
- println(Row.getRowHeader());
- for (int i=0; i<len; i++) {
- println((i+1) + rowArr[i].toString());
- }
- }
-
- private static double[] getEvenSplits(QuantilesSketch sketch, int splitPoints) {
- double min = sketch.getMinValue();
- double max = sketch.getMaxValue();
- return getSplits(min, max, splitPoints);
- }
-
- private static double[] getLogSplits(QuantilesSketch sketch, int splitPoints) {
- double min = sketch.getMinValue();
- double max = sketch.getMaxValue();
- double logMin = log10(min);
- double logMax = log10(max);
- double[] logArr = getSplits(logMin, logMax, splitPoints);
- double[] expArr = new double[logArr.length];
- for (int i= 0; i<logArr.length; i++) {
- expArr[i] = pow(10.0, logArr[i]);
- }
- return expArr;
- }
-
- private static double[] getSplits(double min, double max, int splitPoints) {
- double range = max - min;
- double delta = range/(splitPoints + 1);
- double[] splits = new double[splitPoints];
- for (int i = 0; i < splitPoints; i++) {
- splits[i] = delta * (i+1);
- }
- return splits;
- }
-
- private static boolean isNumeric(String token) {
- for (char c : token.toCharArray()) {
- if (!Character.isDigit(c)) return false;
- }
- return true;
- }
-
- private static BufferedReader getBR(String token) {
- BufferedReader br = null;
- try {
- if ((token == null) || (token.length() == 0)) {
- br = new BufferedReader(new InputStreamReader(System.in));
- } else {
- br = new BufferedReader(new InputStreamReader(new FileInputStream(token)));
- }
- } catch (FileNotFoundException e) {
- printlnErr("File Not Found: "+token);
- System.exit(1);
- }
- return br;
- }
-
- private static void uniqHelp() {
- StringBuilder sb = new StringBuilder();
- sb.append(BOLD+"UNIQ SYNOPSIS"+OFF).append(LS);
- sb.append(" sketch uniq help").append(LS);
- sb.append(" sketch uniq [SIZE] [FILE]");
- println(sb.toString());
- }
-
- private static void rankHelp() {
- StringBuilder sb = new StringBuilder();
- sb.append(BOLD+"RANK SYNOPSIS"+OFF).append(LS);
- sb.append(" sketch rank help").append(LS);
- sb.append(" sketch rank [SIZE] [FILE]");
- println(sb.toString());
- }
-
- private static void histHelp() {
- StringBuilder sb = new StringBuilder();
- sb.append(BOLD+"HIST SYNOPSIS"+OFF).append(LS);
- sb.append(" sketch hist help").append(LS);
- sb.append(" sketch hist [SIZE] [FILE]");
- println(sb.toString());
- }
-
- private static void logHistHelp() {
- StringBuilder sb = new StringBuilder();
- sb.append(BOLD+"LOGHIST SYNOPSIS"+OFF).append(LS);
- sb.append(" sketch loghist help").append(LS);
- sb.append(" sketch loghist [SIZE] [FILE]");
- println(sb.toString());
- }
-
- private static void freqHelp() {
- StringBuilder sb = new StringBuilder();
- sb.append(BOLD+"FREQ SYNOPSIS"+OFF).append(LS);
- sb.append(" sketch freq help").append(LS);
- sb.append(" sketch freq [SIZE] [FILE]");
- println(sb.toString());
- }
-
- static void help() {
- StringBuilder sb = new StringBuilder();
- sb.append(BOLD+"NAME"+OFF).append(LS);
- sb.append(" sketch - sketch Uniques, Quantiles, Histograms, or Frequent Items.").append(LS);
- sb.append(BOLD+"SYNOPSIS"+OFF).append(LS);
- sb.append(" sketch (this help)").append(LS);
- sb.append(" sketch TYPE help").append(LS);
- sb.append(" sketch TYPE [SIZE] [FILE]").append(LS);
- sb.append(BOLD+"DESCRIPTION"+OFF).append(LS);
- sb.append(" Write a sketch(TYPE, SIZE) of FILE to standard output.").append(LS);
- sb.append(" TYPE is required.").append(LS);
- sb.append(" If SIZE is omitted, internal defaults are used.").append(LS);
- sb.append(" If FILE is omitted, Standard In is assumed.").append(LS);
- sb.append(BOLD+"TYPE DESCRIPTION"+OFF).append(LS);
- sb.append(" sketch uniq : Sketch the unique string items of a stream.").append(LS);
- sb.append(" sketch rank : Sketch the rank-value distribution of a numeric value stream.").
- append(LS);
- sb.append(" sketch hist : "+
- "Sketch the linear-axis value-frequency distribution of numeric value stream.").append(LS);
- sb.append(" sketch loghist : "+
- "Sketch the log-axis value-frequency distribution of numeric value stream.").append(LS);
- sb.append(" sketch freq : Sketch the Heavy Hitters of a string item stream.");
- println(sb.toString());
- uniqHelp();
- rankHelp();
- histHelp();
- logHistHelp();
- freqHelp();
- }
-
- private static void printlnErr(String s) { System.err.println(s); }
-
- private static void println(String s) { System.out. println(s); }
-}
diff --git a/src/test/java/com/yahoo/sketches/cmd/CommandLineTest.java b/src/test/java/com/yahoo/sketches/cmd/CommandLineTest.java
deleted file mode 100644
index 344e88d..0000000
--- a/src/test/java/com/yahoo/sketches/cmd/CommandLineTest.java
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-package com.yahoo.sketches.cmd;
-
-//import static org.testng.Assert.*;
-
-//import static com.yahoo.sketches.CommandLine.*;
-import org.testng.annotations.Test;
-
-public class CommandLineTest {
-
- @Test
- public void checkHelp() {
- //CommandLine.help();
- }
-
- @Test
- public void checkUniq() {
- //CommandLine.
- }
-
-}
diff --git a/src/test/java/com/yahoo/sketches/demo/DemoImpl.java b/src/test/java/com/yahoo/sketches/demo/DemoImpl.java
deleted file mode 100644
index f9c16cc..0000000
--- a/src/test/java/com/yahoo/sketches/demo/DemoImpl.java
+++ /dev/null
@@ -1,382 +0,0 @@
-/*
- * Copyright 2015, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-package com.yahoo.sketches.demo;
-
-import static java.lang.Math.sqrt;
-import static com.yahoo.sketches.hash.MurmurHash3.hash;
-
-import com.yahoo.sketches.Family;
-import com.yahoo.sketches.ResizeFactor;
-import com.yahoo.sketches.hll.HllSketch;
-import com.yahoo.sketches.theta.Sketches;
-import com.yahoo.sketches.theta.UpdateSketch;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.nio.ByteBuffer;
-import java.nio.channels.SeekableByteChannel;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.nio.file.StandardOpenOption;
-import java.util.Random;
-
-/**
- * A simple demo that compares brute force counting of uniques vs. using sketches.
- *
- * <p>This demo computes a stream of values and feeds them first to
- * an exact sort-based method of computing the number of unique values
- * in the stream and then feeds a similar stream to two different types of
- * sketches from the library.
- *
- * <p>This demo becomes most significant in the case where the number of uniques in the
- * stream exceeds what the computer can hold in memory.
- *
- * <p>This demo utilizes the Unix sort and wc commands for the brute force compuation.
- * So this needs to be run on a linux or mac machine. A windows machine with a similar unix
- * library installed should also work, but it has not been tested.
- */
-public class DemoImpl {
- //Static constants
- private static final String LS = System.getProperty("line.separator");
- private static final byte LS_BYTE = LS.getBytes()[0];
- private static Random rand = new Random();
- private static StandardOpenOption C = StandardOpenOption.CREATE;
- private static StandardOpenOption W = StandardOpenOption.WRITE;
- private static StandardOpenOption TE = StandardOpenOption.TRUNCATE_EXISTING;
-
- //Stream Configuration
- private int byteBufCap_ = 1000000; //ByteBuffer capacity
- private long n_ = (long)1E8; //stream length
- private final int threshold_; //equivalent uniquesFraction on integer scale
-
- //Sketch configuration
- private int lgK_ = 14; //16K
-
- //Internal sketch values
- private int maxMemSkBytes_;
- private double rse2_; //RSE for 95% confidence
- private UpdateSketch tSketch_ = null;
- private HllSketch hllSketch_ = null;
-
- //Other internal values
- private Path path = Paths.get("tmp/test.txt");
- private long[] vArr_ = new long[1]; //reuse this array
- private long fileBytes_ = 0;
- private long u_ = 0; //unique count;
-
- /**
- * Constuct the demo.
- * @param streamLen The total stream length.
- * @param uniquesFraction the fraction of streamLen values less than 1.0, that will be unique.
- * The actual # of uniques will vary around this value, because it is computed statistically.
- */
- public DemoImpl(long streamLen, double uniquesFraction) {
- if (uniquesFraction == 1.0) {
- this.threshold_ = Integer.MAX_VALUE;
- }
- else {
- this.threshold_ = (int)(Integer.MAX_VALUE * uniquesFraction);
- }
- n_ = streamLen;
- lgK_ = 14; //Log-base 2 of the configured sketch size = 16K
- File dir = new File("tmp");
- if (!dir.exists()) {
- try {
- dir.mkdir();
- } catch(SecurityException e) {
- throw new SecurityException(e);
- }
- }
- }
-
- /**
- * Run the demo
- */
- public void runDemo() {
- println("# COMPUTE DISTINCT COUNT EXACTLY:");
- long exactTimeMS;
-
- exactTimeMS = buildFile();
- //exactTimeMS = buildFileAndSketch(); //used instead only for testing
-
- println("## SORT & REMOVE DUPLICATES");
- String sortCmd = "sort -u -o tmp/sorted.txt tmp/test.txt";
- exactTimeMS += runUnixCmd("sort", sortCmd);
-
- println("\n## LINE COUNT");
- String wcCmd = "wc -l tmp/sorted.txt";
- exactTimeMS += runUnixCmd("wc", wcCmd);
-
- println("Total Exact "+getMinSec(exactTimeMS) +LS+LS);
-
- println("# COMPUTE DISTINCT COUNT USING SKETCHES");
- configureThetaSketch();
- long sketchTimeMS = buildSketch();
- double factor = exactTimeMS*1.0/sketchTimeMS;
- println("Speedup Factor "+String.format("%.1f", factor) + LS);
-
- configureHLLSketch();
- sketchTimeMS = buildSketch();
- factor = exactTimeMS*1.0/sketchTimeMS;
- println("Speedup Factor "+String.format("%.1f", factor));
-
- }
-
- /**
- * @return total test time in milliseconds
- */
- private long buildFile() {
- println("## BUILD FILE:");
- ByteBuffer byteBuf = ByteBuffer.allocate(byteBufCap_);
- u_ = 0;
- fileBytes_ = 0;
- long testStartTime_mS = System.currentTimeMillis();
- try (SeekableByteChannel sbc = Files.newByteChannel(path, C, W, TE)) {
- for (long i=0; i<n_; i++) {
- long v = nextValue();
- String s = Long.toHexString(v);
- if (byteBuf.remaining() < 25) {
- byteBuf.flip();
- fileBytes_ += sbc.write(byteBuf);
- byteBuf.clear();
- }
- byteBuf.put(s.getBytes()).put(LS_BYTE);
- }
- if (byteBuf.position() > 0) { //write remainder
- byteBuf.flip();
- fileBytes_ += sbc.write(byteBuf);
- byteBuf.clear();
- }
- }
- catch (IOException e) {
- e.printStackTrace();
- }
- long testTime_mS = System.currentTimeMillis() - testStartTime_mS;
- //Print common results
- printCommon(testTime_mS, n_, u_);
- //Print file results
- println("File Size Bytes: "+String.format("%,d", fileBytes_) + LS);
- return testTime_mS;
- }
-
- /**
- * @return total test time in milliseconds
- */
- private static long runUnixCmd(String name, String cmd) {
- StringBuilder sbOut = new StringBuilder();
- StringBuilder sbErr = new StringBuilder();
- String out = null;
- String err = null;
- Process p = null;
- String[] envp = {"LC_ALL=C"}; //https://bugs.launchpad.net/ubuntu/+source/coreutils/+bug/846628
- long testStartTime_mS = System.currentTimeMillis();
- try {
- // run the Unix cmd using the Runtime exec method:
- p = Runtime.getRuntime().exec(cmd, envp);
- BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream()));
- BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream()));
-
- // read the output from the command
- boolean outFlag = true;
- while ((out = stdInput.readLine()) != null) {
- if (outFlag) {
- sbOut.append("Output from "+name+" command:").append(LS);
- outFlag = false;
- }
- sbOut.append(out).append(LS);
- }
-
- // read any errors from the attempted command
- boolean errFlag = true;
- while ((err = stdError.readLine()) != null) {
- if (errFlag) {
- sbErr.append("\nError from "+name+" command:").append(LS);
- errFlag = false;
- }
- sbErr.append(err).append(LS);
- }
- }
- catch (IOException e) {
- System.out.println("Exception: ");
- e.printStackTrace();
- System.exit( -1);
- }
- if ((p != null) && (p.isAlive())) {
- p.destroy();
- }
- long testTime_mS = System.currentTimeMillis() - testStartTime_mS;
- println("Unix cmd: "+cmd);
- println(getMinSec(testTime_mS));
- if (sbOut.length() > 0) { println(sbOut.toString()); }
- if (sbErr.length() > 0) { println(sbErr.toString()); }
- return testTime_mS;
- }
-
- /**
- * @return total test time in milliseconds
- */
- private long buildSketch() {
- u_ = 0; //unique counter for accuracy computation
- long testStartTime_mS = System.currentTimeMillis();
-
- if (tSketch_ != null) { //Theta Sketch
- for (long i = 0; i < n_; i++) {
- long v = nextValue();
- tSketch_.update(v);
- }
- }
- else { //HLL Sketch
- for (long i = 0; i < n_; i++) {
- long v = nextValue();
- hllSketch_.update(v);
- }
- }
- long testTime_mS = System.currentTimeMillis() - testStartTime_mS;
-
- //Print sketch name
- String sk = (tSketch_ != null)? "THETA" : "HLL";
- println("## USING "+sk+" SKETCH");
- //Print common results
- printCommon(testTime_mS, n_, u_);
-
- //Print sketch results
- printSketchResults(u_, maxMemSkBytes_, rse2_);
- return testTime_mS;
- }
-
- /**
- * Used in testing
- * @return total test time in milliseconds
- */
- @SuppressWarnings("unused")
- private long buildFileAndSketch() {
- println("## BUILD FILE AND SKETCH:");
- ByteBuffer byteBuf = ByteBuffer.allocate(byteBufCap_);
- u_ = 0;
- fileBytes_ = 0;
- long testStartTime_mS = System.currentTimeMillis();
- try (SeekableByteChannel sbc = Files.newByteChannel(path, C, W, TE)) {
- if (tSketch_ != null) {
- long v = nextValue();
- tSketch_.update(v);
-
- //build file
- String s = Long.toHexString(v);
- if (byteBuf.remaining() < 25) {
- byteBuf.flip();
- fileBytes_ += sbc.write(byteBuf);
- byteBuf.clear();
- }
- byteBuf.put(s.getBytes()).put(LS_BYTE);
- }
- else { //HLL Sketch
- long v = nextValue();
- hllSketch_.update(v);
-
- //build file
- String s = Long.toHexString(v);
- if (byteBuf.remaining() < 25) {
- byteBuf.flip();
- fileBytes_ += sbc.write(byteBuf);
- byteBuf.clear();
- }
- byteBuf.put(s.getBytes()).put(LS_BYTE);
- }
-
- if (byteBuf.position() > 0) {
- byteBuf.flip();
- fileBytes_ += sbc.write(byteBuf);
- byteBuf.clear();
- }
- }
- catch (IOException e) {
- e.printStackTrace();
- }
- long testTime_mS = System.currentTimeMillis() - testStartTime_mS;
-
- //Print common results
- printCommon(testTime_mS, n_, u_);
- //Print file results
- println("File Size Bytes: "+String.format("%,d", fileBytes_));
-
- //Print sketch results
- printSketchResults(u_, maxMemSkBytes_, rse2_);
- return testTime_mS;
- }
-
- /**
- * @return next hashed long value
- */
- private long nextValue() {
- if (((rand.nextInt() >>> 1) < threshold_) || (u_ == 0)) {
- u_++;
- }
- vArr_[0] = u_;
- return hash(vArr_, 0L)[0];
- }
-
-// private long nextValue() { //Faster version, always 100% uniques
-// vArr_[0] = ++u_;
-// return hash(vArr_, 0L)[0];
-// }
-
- private final void configureThetaSketch() {
- int k = 1 << lgK_; //14
- hllSketch_ = null;
- maxMemSkBytes_ = k *16; //includs full hash table
- rse2_ = 2.0/sqrt(k); //Error for 95% confidence
- tSketch_ = Sketches.updateSketchBuilder().
- setResizeFactor(ResizeFactor.X1).
- setFamily(Family.ALPHA).build(k );
- }
-
- private final void configureHLLSketch() {
- int k = 1 << lgK_; //14
- boolean compressed = true;
- boolean hipEstimator = true;
- boolean denseMode = true;
- tSketch_ = null;
- maxMemSkBytes_ = (compressed)? k/2 : k;
- rse2_ = 2.0 * ((hipEstimator)? 0.836/sqrt(k) : 1.04/sqrt(k)); //for 95% confidence
- hllSketch_ = HllSketch.builder().setLogBuckets(lgK_).
- setHipEstimator(hipEstimator).
- setDenseMode(denseMode).
- setCompressedDense(compressed).
- build();
- }
-
- private static void printCommon(long testTime, long n, long u) {
- println(getMinSec(testTime));
- println("Total Values: "+String.format("%,d",n));
- int nSecRate = (int) (testTime *1000000.0/n);
- println("Build Rate: "+ String.format("%d nSec/Value", nSecRate));
- println("Exact Uniques: "+String.format("%,d", u));
- }
-
- private void printSketchResults(long u, int maxMemSkBytes, double rse2) {
- String sk = (tSketch_ != null)? "THETA" : "HLL";
- println("## USING "+sk+" SKETCH");
- double rounded = Math.round((tSketch_ != null)? tSketch_.getEstimate() : hllSketch_.getEstimate());
- println("Sketch Estimate of Uniques: "+ String.format("%,d", (long)rounded));
- double err = (u == 0)? 0 : (rounded/u - 1.0);
- println("Sketch Relative Error: "+String.format("%.3f%%, +/- %.3f%%", err*100, rse2*100));
- println("Max Sketch Size Bytes: "+ String.format("%,d", maxMemSkBytes));
- }
-
- private static String getMinSec(long mSec) {
- int totSec = (int)(mSec/1000.0);
- int min = totSec/60;
- int sec = totSec%60;
- int ms = (int)(mSec - totSec * 1000);
- String t = String.format("Time Min:Sec.mSec = %d:%02d.%03d", min, sec, ms);
- return t;
- }
-
- private static void println(String s) { System.out.println(s); }
-
-}
diff --git a/src/test/java/com/yahoo/sketches/demo/ExactVsSketchDemo.java b/src/test/java/com/yahoo/sketches/demo/ExactVsSketchDemo.java
deleted file mode 100644
index cbc6640..0000000
--- a/src/test/java/com/yahoo/sketches/demo/ExactVsSketchDemo.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright 2015, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.demo;
-
-import com.yahoo.sketches.demo.DemoImpl;
-
-/**
- * <p>This demo computes a stream of values and feeds them first to
- * an exact sort-based method of computing the number of unique values
- * in the stream and then feeds a similar stream to two different types of
- * sketches from the library.
- *
- * <p>This demo becomes most significant in the case where the number of uniques in the
- * stream exceeds what the computer can hold in memory.
- *
- * <p>This demo utilizes the Unix/Linux/OS-X sort and wc commands for the brute force compuation.
- * So this needs to be run on a linux or mac machine. A windows machine with a suitable unix
- * library installed might also work, but it has not been tested.
- *
- * <p>To run this demo from the command line:</p>
- * <ul><li>Clone the lastest snapshot from https://github.com/DataSketches/sketches-core.</li>
- * <li>Change to the directory where you did the clone</li>
- * <li>Do a Maven Install: "mvn install"</li>
- * <li>In the following commands replace X.Y.Z with the actual jar version from the target
- * directory:<br>
- * javac -cp target/sketches-core-X.Y.Z.jar src/test/java/com/yahoo/sketches/demo/*.java<br>
- * java -cp target/sketches-core-X.Y.Z.jar:src/test/java com.yahoo.sketches.demo.ExactVsSketchDemo
- * 1E6</li>
- * <li>The demo will output results to the console. You can change the 1E6 (1 million) to even
- * larger values (e.g., 1E8) but be patient. The exact sort can take a long, long time!</li>
- * </ul>
- *
- */
-public class ExactVsSketchDemo {
-
- /**
- * Runs the demo.
- *
- * @param args
- * <ul><li>arg[0]: (Optional) The stream length and can be expressed as a positive double value.
- * The default is 1E6.</li>
- * <li>arg[1] (Optional) The fraction of the stream length that will be unique, the remainder
- * will be duplicates. The default is 1.0. Note that if this argument is less than 1.0,
- * the actual number of exact uniques is statistically determined for each trial and then
- * separately counted. That is, the number of exact uniques for the "sort" trial
- * will be different from the exact uniques for each of the sketch trial. </li>
- * </ul>
- */
- public static void main(String[] args) {
- int argsLen = args.length;
- long streamLen = (long)1E8; //The default stream length
- double uFrac = 1.0; //The default fraction that are unique
- if (argsLen == 1) {
- streamLen = (long)(Double.parseDouble(args[0]));
- } else if (argsLen > 1) {
- streamLen = (long)(Double.parseDouble(args[0]));
- uFrac = Double.parseDouble(args[1]);
- }
-
- DemoImpl demo = new DemoImpl(streamLen, uFrac);
-
- demo.runDemo();
- }
-
-}
diff --git a/src/test/java/com/yahoo/sketches/examples/ExamplesTest.java b/src/test/java/com/yahoo/sketches/examples/ExamplesTest.java
deleted file mode 100644
index d95e77a..0000000
--- a/src/test/java/com/yahoo/sketches/examples/ExamplesTest.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright 2015, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-package com.yahoo.sketches.examples;
-
-import org.testng.annotations.Test;
-
-import com.yahoo.sketches.BinomialBoundsN;
-import com.yahoo.sketches.theta.AnotB;
-import com.yahoo.sketches.theta.CompactSketch;
-import com.yahoo.sketches.theta.Intersection;
-import com.yahoo.sketches.theta.Sketches;
-import com.yahoo.sketches.theta.Union;
-import com.yahoo.sketches.theta.UpdateSketch;
-
-public class ExamplesTest {
-
- @Test
- public void setOpsExample() {
- println("Set Operations Example:");
- int k = 4096;
- UpdateSketch skA = Sketches.updateSketchBuilder().build(k);
- UpdateSketch skB = Sketches.updateSketchBuilder().build(k);
- UpdateSketch skC = Sketches.updateSketchBuilder().build(k);
-
- for (int i=1; i<=10; i++) { skA.update(i); }
- for (int i=1; i<=20; i++) { skB.update(i); }
- for (int i=6; i<=15; i++) { skC.update(i); } //overlapping set
-
- Union union = Sketches.setOperationBuilder().buildUnion(k);
- union.update(skA);
- union.update(skB);
- // ... continue to iterate on the input sketches to union
-
- CompactSketch unionSk = union.getResult(); //the result union sketch
- println("A U B : "+unionSk.getEstimate()); //the estimate of the union
-
- //Intersection is similar
-
- Intersection inter = Sketches.setOperationBuilder().buildIntersection();
- inter.update(unionSk);
- inter.update(skC);
- // ... continue to iterate on the input sketches to intersect
-
- CompactSketch interSk = inter.getResult(); //the result intersection sketch
- println("(A U B) ^ C: "+interSk.getEstimate()); //the estimate of the intersection
-
- //The AnotB operation is a little different as it is stateless:
-
- AnotB aNotB = Sketches.setOperationBuilder().buildANotB();
- aNotB.update(skA, skC);
-
- CompactSketch not = aNotB.getResult();
- println("A \\ C : "+not.getEstimate()); //the estimate of the AnotB operation
- }
-
- @Test
- public void boundsExample() {
- println("BinomialBoundsN Example:");
- int k = 500;
- double theta = 0.001;
- int stdDev = 2;
- double ub = BinomialBoundsN.getUpperBound(k, theta, stdDev, false);
- double est = k/theta;
- double lb = BinomialBoundsN.getLowerBound(k, theta, stdDev, false);
- println("K="+k+", Theta="+theta+", SD="+stdDev);
- println("UB: "+ub);
- println("Est: "+est);
- println("LB: "+lb);
- println("");
- }
-
- @Test
- public void printlnTest() {
- println("PRINTING: "+this.getClass().getName());
- }
-
- /**
- * @param s value to print
- */
- static void println(String s) {
- //System.out.println(s); //disable here
- }
-
- public static void main(String[] args) {
- ExamplesTest ext = new ExamplesTest();
- ext.setOpsExample();
- ext.boundsExample();
-
- }
-}
diff --git a/src/test/java/com/yahoo/sketches/performance/ProcessStats.java b/src/test/java/com/yahoo/sketches/performance/ProcessStats.java
deleted file mode 100644
index dfe8160..0000000
--- a/src/test/java/com/yahoo/sketches/performance/ProcessStats.java
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Copyright 2015, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-package com.yahoo.sketches.performance;
-
-import static java.lang.Math.abs;
-import static java.lang.Math.sqrt;
-
-import java.util.Arrays;
-
-/**
- * Processes the statistics collected from an array of Stats objects from a trial set
- * and creates an output row
- *
- * @author Lee Rhodes
- */
-public class ProcessStats {
- private static final char TAB = '\t';
- //Quantile fractions computed from the standard normal cumulative distribution.
- private static final double M2SD = 0.022750131948179; //minus 2 StdDev
- private static final double M1SD = 0.158655253931457; //minus 1 StdDev
- private static final double P1SD = 0.841344746068543; //plus 1 StdDev
- private static final double P2SD = 0.977249868051821; //plus 2 StdDev
-
- /**
- * Process the Stats[] array and place the output row into the dataStr.
- * @param statsArr the input Stats array
- * @param uPerTrial the number of uniques per trial for this trial set.
- * @param lgK log base 2 of configured nominal entries, or k.
- * @param p the probability sampling rate. 0 < p ≤ 1.0.
- * @param dataStr The StringBuilder object that is reused for each row of output
- */
- public static void process(Stats[] statsArr, int uPerTrial, int lgK, double p, StringBuilder dataStr) {
- int k = 1 << lgK;
- int trials = statsArr.length;
- Arrays.sort(statsArr, 0, trials);
-
- //Computing the quantiles from the sorted array.
- double min = statsArr[0].re;
- double qM2SD = statsArr[quantileIndex(M2SD,trials)].re;
- double qM1SD = statsArr[quantileIndex(M1SD,trials)].re;
- double q50 = statsArr[quantileIndex(.5,trials)].re;
- double qP1SD = statsArr[quantileIndex(P1SD,trials)].re;
- double qP2SD = statsArr[quantileIndex(P2SD,trials)].re;
- double max = statsArr[trials-1].re;
-
- int cntLB2 = 0, cntLB1 = 0, cntUB1 = 0, cntUB2 = 0;
-// double sumLB2 = 0, sumLB1 = 0, sumUB1 = 0, sumUB2 = 0;
- double sumEst = 0, sumEstErr = 0, sumSqEstErr = 0;
- double sumUpdateTimePerU_nS = 0;
- //Scan the sorted statsArr
- for (int i=0; i<trials; i++) {
- Stats stats = statsArr[i];
- if (uPerTrial > stats.ub2est) cntUB2++; //should be < 2.275%; under estimate
- if (uPerTrial > stats.ub1est) cntUB1++; //should be < 15.866%; under estimate
- if (uPerTrial < stats.lb1est) cntLB1++; //should be < 15.866%; over estimate
- if (uPerTrial < stats.lb2est) cntLB2++; //should be < 2.275%; over estimate
-// sumLB2 += stats.lb2est;
-// sumLB1 += stats.lb1est;
-// sumUB1 += stats.ub1est;
-// sumUB2 += stats.ub2est;
- //divide by uPerTrial to normalize betweeen 0 and 1.0, sum over all trials
- //Components for the mean and variance of the estimate error
- sumEst += statsArr[i].estimate;
- double estErr = statsArr[i].re;
- sumEstErr += estErr;
- sumSqEstErr += estErr*estErr;
-
- sumUpdateTimePerU_nS += statsArr[i].updateTimePerU_nS;
- }
- //normalize counts
- double fracTgtUB2 = (double)cntUB2/trials;
- double fracTgtUB1 = (double)cntUB1/trials;
- double fracTltLB1 = (double)cntLB1/trials;
- double fracTltLB2 = (double)cntLB2/trials;
-
- //Compute the average results over the trial set
- double meanEst = sumEst/trials;
- double meanEstErr = sumEstErr/trials;
- double deltaSqEstErr = abs(sumSqEstErr - (sumEstErr*sumEstErr)/trials);
- double varEstErr = (trials == 1)? deltaSqEstErr/trials : deltaSqEstErr/(trials-1);
- double rse = sqrt(varEstErr);
- //compute theoretical sketch RSE
- double invKm1 = 1.0/(k-1);
- double oneMinusKoverN = 1.0 - (double)k/uPerTrial;
- double thrse = (sumEstErr == 0.0)? 0.0 : sqrt(invKm1 * oneMinusKoverN);
- //compute Bernoulli RSE
- double invUperTrial = 1.0/uPerTrial;
- double varOverN = (p == 1.0)? 0.0 : 1.0/p - 1.0;
- double prse = (p == 1.0)? 0.0 : sqrt(invUperTrial * varOverN);
-
- //Compute average of each of the bounds estimates
-// double meanLB2est = sumLB2/(uPerTrial*trials) -1;
-// double meanLB1est = sumLB1/(uPerTrial*trials) -1;
-// double meanUB1est = sumUB1/(uPerTrial*trials) -1;
-// double meanUB2est = sumUB2/(uPerTrial*trials) -1;
-
- //Speed
- double meanUpdateTimePerU_nS = sumUpdateTimePerU_nS/trials;
-
- //OUTPUT
- dataStr.setLength(0);
- dataStr.append(uPerTrial).append(TAB).
-
- //Sketch estimates, mean, variance
- append(meanEst).append(TAB).
- append(meanEstErr).append(TAB).
- append(rse).append(TAB).
- append(thrse).append(TAB).
- append(prse).append(TAB).
-
- //Quantiles measured from the actual distribution of values from all trials.
- //Because of quantization effects these values will be noisier than the values
- //computed statistically above.
- append(min).append(TAB).
- append(qM2SD).append(TAB).
- append(qM1SD).append(TAB).
- append(q50).append(TAB).
- append(qP1SD).append(TAB).
- append(qP2SD).append(TAB).
- append(max).append(TAB).
-
- //Fractional Bounds measurements
- append(fracTltLB2).append(TAB).
- append(fracTltLB1).append(TAB).
- append(fracTgtUB1).append(TAB).
- append(fracTgtUB2).append(TAB).
-
- //The bounds estimates are computed mathematically based on the sketch
- // estimate, the number of valid values in the cache and the value of theta.
- // Because of this thes values will be relatively smooth from point to point along the
- // unique value axis.
-// append(meanLB2est).append(TAB).
-// append(meanLB1est).append(TAB).
-// append(meanUB1est).append(TAB).
-// append(meanUB2est).append(TAB).
- //Trials
- append(trials).append(TAB).
- //Speed
- append(meanUpdateTimePerU_nS);
- }
-
- /**
- * Returns a column header row
- * @return a column header row
- */
- public static String getHeader() {
- StringBuilder sb = new StringBuilder();
- sb. append("InU").append(TAB).
- //Estimates
- append("MeanEst").append(TAB).
- append("MeanErr").append(TAB).
- append("RSE").append(TAB).
- append("thRSE").append(TAB).
- append("pRSE").append(TAB).
- //Quantiles
- append("Min").append(TAB).
- append("QM2SD").append(TAB).
- append("QM1SD").append(TAB).
- append("Q50").append(TAB).
- append("QP1SD").append(TAB).
- append("QP2SD").append(TAB).
- append("Max").append(TAB).
- //Fractional Bounds measurements
- append("FracTltLB2").append(TAB).
- append("FracTltLB1").append(TAB).
- append("FracTgtUB1").append(TAB).
- append("FracTgtUB2").append(TAB).
-
- //Trials
- append("Trials").append(TAB).
- //Speed
- append("nS/u");
- return sb.toString();
- }
-
- /**
- * Returns the trial index = floor(quantile-fraction, #trials)
- * @param frac the desired quantile fraction (0.0 - 1.0)
- * @param trials the number of total trials
- * @return the trial index
- */
- private static int quantileIndex(double frac, int trials) {
- int idx1 = (int) Math.floor(frac*trials);
- return (idx1 >= trials)? trials-1: idx1;
- }
-}
diff --git a/src/test/java/com/yahoo/sketches/performance/SketchPerformance.java b/src/test/java/com/yahoo/sketches/performance/SketchPerformance.java
deleted file mode 100644
index a722804..0000000
--- a/src/test/java/com/yahoo/sketches/performance/SketchPerformance.java
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright 2015, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-package com.yahoo.sketches.performance;
-
-import static java.lang.Math.floor;
-import static java.lang.Math.pow;
-
-import com.yahoo.sketches.Family;
-import com.yahoo.sketches.ResizeFactor;
-import com.yahoo.sketches.hll.HllSketch;
-import com.yahoo.sketches.hll.HllSketchBuilder;
-import com.yahoo.sketches.theta.UpdateSketch;
-import com.yahoo.sketches.theta.UpdateSketchBuilder;
-
-/**
- * Used to generate data for plotting the error distribution or speed performance of a sketch.
- * The X-axis is assumed to be the number of uniques fed to the sketch and varies from 1 to whatever
- * is specified in the lgMaxU parameter. "lg" is shorthand for Log_base_2, so if lgMaxU is 12 then
- * the highest number of uniques on the X-axis would be 4096. An exponential series is used for the
- * unique values per trial so that a wide range of unique values (over many octaves) can be tested
- * using a constant number of points per octave. This dramatically reduces the number of plotting
- * points required and produces nice plots when plotted against a log axis.
- *
- * <p>See the main() method as an example of how to configure.
- *
- * @author Lee Rhodes
- */
-public class SketchPerformance {
-
- /**
- * This method drives the whole process. An exponential series is used for the unique
- * counts per trial so that a wide range of unique values (over many octaves) can be tested using
- * a constant number of points per octave. This dramatically reduces the number of plotting points
- * required and produces nice plots when plotted against a log axis. See the main() method as an
- * example of how to configure this.
- *
- * @param trialMgr TrialManager to be used
- */
- public static void start(TrialManager trialMgr) {
- long testStartTime_mS = System.currentTimeMillis();
- int lastGI = trialMgr.getMaximumGeneratingIndex();
- int ppo = trialMgr.getPPO();
- int lastU = 0;
- println(ProcessStats.getHeader());
- StringBuilder dataStr = new StringBuilder();
-
- //Each generating index (gi) will generate a new row of data
- // representing N trials at a specific number of unique values.
- for (int gi = 0; gi <= lastGI; gi++) {
- int u = (int)floor(pow(2.0, (double)gi/ppo));
- if (u == lastU) continue; //at the low end skips over duplicate values of u
- lastU = u;
- int trials = trialMgr.getTrials(u);
- int lgK = trialMgr.getLgK();
- double p = trialMgr.getP();
- Stats[] statsArr = processTrialSet(trialMgr, u, trials);
- ProcessStats.process(statsArr, u, lgK, p, dataStr);
- println(dataStr.toString());
- }
- int testTime_S = (int)((System.currentTimeMillis() - testStartTime_mS)/1000.0);
- int min = testTime_S/60;
- int sec = testTime_S%60;
- println("TestTime: "+min+":"+sec);
- }
-
- /**
- * A Trial Set is a number of trials at number of uniques per trial, uPerTrial.
- * This is set up so that the number of trials may vary based on the number of uniques for the
- * trial set.
- * @param trialMgr manages the sketch and updating of a stats object
- * @param uPerTrial uniques for every trial of a trial set
- * @param trials number of trials per trial set
- * @return the Stats array contains measurements for each trial of the trial set
- */
- private static Stats[] processTrialSet(TrialManager trialMgr, int uPerTrial, int trials) {
- Stats[] statsArr = new Stats[trials];
- System.gc();
- for (int t=0; t < trials; t++) {
- if (statsArr[t] == null) statsArr[t] = new Stats();
- trialMgr.doTrial(statsArr[t], uPerTrial);
- }
- return statsArr;
- }
-
- private static void println(String s) { System.out.println(s); }
-
-
- /**
- * This main method sets the configuration of the sketches, the TrialManager profile, and
- * runs the test.
- * @param args not used.
- */
- public static void main(String[] args) {
- //Common parameters
- int lgK = 12; //4K
- boolean udSketch = true; //set true if you want to use a theta UpdateSketch, false for HLL
-
- //Theta UpdateSketch parameters
- Family family = Family.QUICKSELECT;
- ResizeFactor rf = ResizeFactor.X1;// See javadocs.
- boolean direct = false; //See javadocs and the setSketchProfile code
- float p = 1.0F;
- boolean rebuild = false; //set true if rebuild is desired to reduce size down to k.
-
- //HLL Parameters
- boolean hip = true;
- boolean dense = false;
-
- //Trials Profile Parameters
- // For speed trials use min=4,5, max= 13,14,15,16
- // For accuracy trials use min=max= 10 or more
- int lgMinTrials = 4;
- int lgMaxTrials = 13;
- int lgMaxU = 20;
- int ppo = 16;
-
- //INITIALIZE
- TrialManager trialMgr = new TrialManager();
- trialMgr.setTrialsProfile(lgMinTrials, lgMaxTrials, lgMaxU, ppo);
- UpdateSketchBuilder udBldr = null;
- HllSketchBuilder hllBldr = null;
-
- if (udSketch) { //UpdateSketch Builder
- udBldr = UpdateSketch.builder().setNominalEntries(1 << lgK).setFamily(family).setP(p).
- setResizeFactor(rf);
- trialMgr.setUpdateSketchBuilder(udBldr, direct, rebuild);
- }
- else { //HLL Builder
- hllBldr = HllSketch.builder().setLogBuckets(lgK).setHipEstimator(hip).setDenseMode(dense);
- trialMgr.setHllSketchBuilder(hllBldr);
- }
-
- //START THE TESTS
- SketchPerformance.start(trialMgr);
-
- //PRINT SUMMARY
- if (udBldr != null) println(udBldr.toString());
- if (hllBldr != null) println(hllBldr.toString());
- println(trialMgr.toString());
- }
-
-}
diff --git a/src/test/java/com/yahoo/sketches/performance/Stats.java b/src/test/java/com/yahoo/sketches/performance/Stats.java
deleted file mode 100644
index 5cefe78..0000000
--- a/src/test/java/com/yahoo/sketches/performance/Stats.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright 2015, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-package com.yahoo.sketches.performance;
-
-import com.yahoo.sketches.hll.HllSketch;
-import com.yahoo.sketches.theta.UpdateSketch;
-
-/**
- * Holds key metrics from a single trial
- *
- * @author Lee Rhodes
- */
-public class Stats implements Comparable<Stats> {
- double estimate; //The estimate from the sketch
- double re = 0; //Relative Error. Will sort by this
- double lb2est; //LowerBound estimate at -2 StdDev
- double lb1est; //LowerBound estimate at -1 StdDev
- double ub1est; //UpperBound estimate at +1 StdDev
- double ub2est; //UpperBound estimate at +2 StdDev
- double updateTimePerU_nS;
-
- /**
- * Update this Stats with a theta UpdateSketch
- * @param sketch the sketch to update with
- * @param uPerTrial the number of uniques fed to the sketch in this trial
- * @param updateTime_nS the update time requred for all the updates in nanoSeconds.
- */
- public void update(UpdateSketch sketch, int uPerTrial, long updateTime_nS) {
- estimate = sketch.getEstimate();
- re = estimate/uPerTrial - 1.0;
- lb2est = sketch.getLowerBound(2);
- lb1est = sketch.getLowerBound(1);
- ub1est = sketch.getUpperBound(1);
- ub2est = sketch.getUpperBound(2);
- updateTimePerU_nS = (double)updateTime_nS / uPerTrial;
- }
-
- /**
- * Update this Stats with an HLL Sketch
- * @param sketch the sketch to update with
- * @param uPerTrial the number of uniques fed to the sketch in this trial
- * @param updateTime_nS the update time requred for all the updates in nanoSeconds.
- */
- public void update(HllSketch sketch, int uPerTrial, long updateTime_nS) {
- estimate = sketch.getEstimate();
- re = estimate/uPerTrial - 1.0;
- lb2est = sketch.getLowerBound(2);
- lb1est = sketch.getLowerBound(1);
- ub1est = sketch.getUpperBound(1);
- ub2est = sketch.getUpperBound(2);
- updateTimePerU_nS = (double)updateTime_nS / uPerTrial;
- }
-
- @Override
- public int compareTo(Stats that) {
- return (this.re < that.re)? -1 : (this.re > that.re)? 1 : 0;
- }
-}
diff --git a/src/test/java/com/yahoo/sketches/performance/TrialManager.java b/src/test/java/com/yahoo/sketches/performance/TrialManager.java
deleted file mode 100644
index 22f9714..0000000
--- a/src/test/java/com/yahoo/sketches/performance/TrialManager.java
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright 2015, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-package com.yahoo.sketches.performance;
-
-import static java.lang.Math.log;
-import static java.lang.Math.pow;
-
-import com.yahoo.sketches.hll.HllSketch;
-import com.yahoo.sketches.hll.HllSketchBuilder;
-import com.yahoo.sketches.memory.Memory;
-import com.yahoo.sketches.memory.NativeMemory;
-import com.yahoo.sketches.theta.Sketch;
-import com.yahoo.sketches.theta.UpdateSketch;
-import com.yahoo.sketches.theta.UpdateSketchBuilder;
-
-/**
- * Manages the execution of every trial. One of these for the entire process.
- *
- * @author Lee Rhodes
- */
-public class TrialManager {
- private static final double LN2 = log(2.0);
- private UpdateSketch udSketch_ = null;
- private HllSketchBuilder hllBuilder_ = null;
- private int lgK_;
- private double p_;
- //Global counter that increments for every new unique value.
- //Assures that all sketches are virtually independent.
- private long vIn_;
- private int lgBP_; //The break point
- private int lgMinTrials_;
- private int lgMaxTrials_;
- private int lgMaxU_;
- private int ppo_;
- private double slope_;
- private boolean rebuild_ = false;
-
- /**
- * Sets the theta UpdateSketch builder used to create the theta UpdateSketches.
- * @param udBldr the theta UpdateSketchBuilder
- * @param direct true if direct (off heap) mode is desired. Instead of actual off heap memory
- * this will emulate that behavior by using an on-heap byte array accessed by the Memory package.
- * Performance-wise it is the same except for issues of garbage collection, which is not the
- * purpose of this test.
- * @param rebuild set true if rebuild is desired
- */
- public void setUpdateSketchBuilder(UpdateSketchBuilder udBldr, boolean direct, boolean rebuild) {
- lgK_ = udBldr.getLgNominalEntries();
- p_ = udBldr.getP();
- int k = 1 << lgK_;
- lgBP_ = lgK_ + 1; //set the break point where the #trials starts to decrease.
- Memory mem = null;
- if (direct) {
- int bytes = Sketch.getMaxUpdateSketchBytes(k);
- byte[] memArr = new byte[bytes];
- mem = new NativeMemory(memArr);
- udBldr.initMemory(mem);
- }
- udSketch_ = udBldr.initMemory(mem).build(k);
- rebuild_ = rebuild;
- }
-
- /**
- * Sets the HLL builder used to create the HLL sketches.
- * @param hllBldr the HllSketchBuilder
- */
- public void setHllSketchBuilder(HllSketchBuilder hllBldr) {
- lgK_ = hllBldr.getLogBuckets();
- p_ = 1.0;
- udSketch_ = null;
- hllBuilder_ = hllBldr;
- }
-
- /**
- * This sets the profile for how the number of trials vary with the number of uniques.
- * The number of trials is the maximum until the number of uniques exceeds k, whereby
- * the number of trials starts to decrease in a power-law fashion until the minimum
- * number of trials is reached at the maximum number of uniques to be tested.
- * @param lgMinTrials The minimum number of trials in a trial set specified as the
- * exponent of 2. This will occur at the maximum uniques value.
- * @param lgMaxTrials The maximum number of trials in a trial set specified as the
- * exponent of 2.
- * @param lgMaxU The maximum number of uniques for this entire test specified as the
- * exponent of 2. The first trail set starts at uniques (u = 1).
- * @param ppo The number of Points Per Octave along the unique value number line
- * that will be used for generating trial sets. Recommended values are one point per octave
- * to 16 points per octave.
- */
- public void setTrialsProfile(int lgMinTrials, int lgMaxTrials, int lgMaxU, int ppo) {
- lgMinTrials_ = lgMinTrials;
- lgMaxTrials_ = lgMaxTrials;
- lgMaxU_ = lgMaxU;
- ppo_ = ppo;
- slope_ = (double)(lgMaxTrials - lgMinTrials) / (lgBP_ - lgMaxU_);
- }
-
- /**
- * Create (or reset) a sketch and perform uPerTrial updates then update the given Stats.
- * @param stats The given Stats object
- * @param uPerTrial the number of updates for this trial.
- */
- public void doTrial(Stats stats, int uPerTrial) {
- if (udSketch_ != null) { //UpdateSketch
- udSketch_.reset(); //reuse the same sketch
- long startUpdateTime_nS = System.nanoTime();
- for (int u=uPerTrial; u--> 0; ) { udSketch_.update(vIn_++); }
- long updateTime_nS = System.nanoTime() - startUpdateTime_nS;
- if (rebuild_) { udSketch_.rebuild(); } //Resizes down to k. Only useful with QuickSelectSketch
- stats.update(udSketch_, uPerTrial, updateTime_nS);
- }
- else { //HllSketch
- HllSketch hllSketch = hllBuilder_.build();
- long startUpdateTime_nS = System.nanoTime();
- for (int u=uPerTrial; u--> 0; ) hllSketch.update(new long[]{vIn_++});
- long updateTime_nS = System.nanoTime() - startUpdateTime_nS;
- stats.update(hllSketch, uPerTrial, updateTime_nS);
- }
- }
-
- /**
- * Computes the number of trials for a given current number of uniques for a trial set.
- * @param curU the given current number of uniques for a trial set.
- * @return the number of trials for a given current number of uniques for a trial set.
- */
- public int getTrials(int curU) {
- if ((lgMinTrials_ == lgMaxTrials_) || (curU <= (1 << lgBP_))) {
- return 1 << lgMaxTrials_;
- }
- double lgCurU = log(curU)/LN2;
- double lgTrials = slope_ * (lgCurU - lgBP_) + lgMaxTrials_;
- return (int) pow(2.0, lgTrials);
- }
-
- /**
- * Return the Log-base 2 of the configured nominal entries or k
- * @return the Log-base 2 of the configured nominal entries or k
- */
- public int getLgK() {
- return lgK_;
- }
-
- /**
- * Return the probability sampling rate, <i>p</i>.
- * @return the probability sampling rate, <i>p</i>.
- */
- public double getP() {
- return p_;
- }
-
- /**
- * Return the configured Points-Per-Octave.
- * @return the configured Points-Per-Octave.
- */
- public int getPPO() {
- return ppo_;
- }
-
- /**
- * Return true if sketch rebuild is requested to bring sketch size down to k, if necessary.
- * Only relevant for QuickSelectSketch.
- * @return true if sketch rebuild is requested to bring sketch size down to k, if necessary.
- */
- public boolean getRebuild() {
- return rebuild_;
- }
-
- /**
- * Returns the maximum generating index (gi) from the log_base2 of the maximum number of uniques
- * for the entire test run.
- * @return the maximum generating index (gi)
- */
- public int getMaximumGeneratingIndex() {
- return ppo_*lgMaxU_;
- }
-
- @Override
- public String toString() {
- return "Trials Profile: LgMinTrials: "+lgMinTrials_+", LgMaxTrials: "+lgMaxTrials_+
- ", lgMaxU: "+lgMaxU_+", PPO: "+ppo_+", Rebuild: "+rebuild_;
- }
-
-}
diff --git a/src/test/java/com/yahoo/sketches/theta/SetOperationTest.java b/src/test/java/com/yahoo/sketches/theta/SetOperationTest.java
index ebb4d8d..a8ab57f 100644
--- a/src/test/java/com/yahoo/sketches/theta/SetOperationTest.java
+++ b/src/test/java/com/yahoo/sketches/theta/SetOperationTest.java
@@ -328,10 +328,49 @@
assertTrue(SetOperation.isValidSetOpID(UNION.getID()));
assertTrue(SetOperation.isValidSetOpID(INTERSECTION.getID()));
assertTrue(SetOperation.isValidSetOpID(A_NOT_B.getID()));
-
}
@Test
+ public void setOpsExample() {
+ println("Set Operations Example:");
+ int k = 4096;
+ UpdateSketch skA = Sketches.updateSketchBuilder().build(k);
+ UpdateSketch skB = Sketches.updateSketchBuilder().build(k);
+ UpdateSketch skC = Sketches.updateSketchBuilder().build(k);
+
+ for (int i=1; i<=10; i++) { skA.update(i); }
+ for (int i=1; i<=20; i++) { skB.update(i); }
+ for (int i=6; i<=15; i++) { skC.update(i); } //overlapping set
+
+ Union union = Sketches.setOperationBuilder().buildUnion(k);
+ union.update(skA);
+ union.update(skB);
+ // ... continue to iterate on the input sketches to union
+
+ CompactSketch unionSk = union.getResult(); //the result union sketch
+ println("A U B : "+unionSk.getEstimate()); //the estimate of the union
+
+ //Intersection is similar
+
+ Intersection inter = Sketches.setOperationBuilder().buildIntersection();
+ inter.update(unionSk);
+ inter.update(skC);
+ // ... continue to iterate on the input sketches to intersect
+
+ CompactSketch interSk = inter.getResult(); //the result intersection sketch
+ println("(A U B) ^ C: "+interSk.getEstimate()); //the estimate of the intersection
+
+ //The AnotB operation is a little different as it is stateless:
+
+ AnotB aNotB = Sketches.setOperationBuilder().buildANotB();
+ aNotB.update(skA, skC);
+
+ CompactSketch not = aNotB.getResult();
+ println("A \\ C : "+not.getEstimate()); //the estimate of the AnotB operation
+ }
+
+
+ @Test
public void printlnTest() {
println("PRINTING: "+this.getClass().getName());
}