blob: a980841b6cb0fe064a98e8a2f80cd1a433e57df6 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.kll;
import static java.lang.Math.ceil;
import static org.apache.datasketches.kll.KllSketch.SketchStructure.*;
import static org.apache.datasketches.kll.KllSketch.SketchType.*;
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE;
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
import java.util.Comparator;
import org.apache.datasketches.common.ArrayOfStringsSerDe;
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.common.Util;
import org.apache.datasketches.kll.KllSketch.SketchType;
import org.apache.datasketches.memory.DefaultMemoryRequestServer;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.quantilescommon.DoublesSortedView;
import org.apache.datasketches.quantilescommon.GenericSortedView;
import org.apache.datasketches.quantilescommon.GenericSortedViewIterator;
import org.testng.annotations.Test;
@SuppressWarnings("unused")
public class KllItemsSketchTest {
private static final double PMF_EPS_FOR_K_8 = 0.35; // PMF rank error (epsilon) for k=8
private static final double PMF_EPS_FOR_K_128 = 0.025; // PMF rank error (epsilon) for k=128
private static final double PMF_EPS_FOR_K_256 = 0.013; // PMF rank error (epsilon) for k=256
private static final double NUMERIC_NOISE_TOLERANCE = 1E-6;
private ArrayOfStringsSerDe serDe = new ArrayOfStringsSerDe();
@Test
public void empty() {
final KllItemsSketch<String> sketch = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
sketch.update(null); // this must not change anything
assertTrue(sketch.isEmpty());
assertEquals(sketch.getN(), 0);
assertEquals(sketch.getNumRetained(), 0);
try { sketch.getRank("", INCLUSIVE); fail(); } catch (SketchesArgumentException e) {}
try { sketch.getMinItem(); fail(); } catch (SketchesArgumentException e) {}
try { sketch.getMaxItem(); fail(); } catch (SketchesArgumentException e) {}
try { sketch.getQuantile(0.5); fail(); } catch (SketchesArgumentException e) {}
try { sketch.getQuantiles(new double[] {0}); fail(); } catch (SketchesArgumentException e) {}
try { sketch.getPMF(new String[] {""}); fail(); } catch (SketchesArgumentException e) {}
try { sketch.getCDF(new String[] {""}); fail(); } catch (SketchesArgumentException e) {}
assertNotNull(sketch.toString(true, true));
assertNotNull(sketch.toString());
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void getQuantileInvalidArg() {
final KllItemsSketch<String> sketch = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
sketch.update("A");
sketch.getQuantile(-1.0);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void getQuantilesInvalidArg() {
final KllItemsSketch<String> sketch = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
sketch.update("A");
sketch.getQuantiles(new double[] {2.0});
}
@Test
public void oneValue() {
final KllItemsSketch<String> sketch = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
sketch.update("A");
assertFalse(sketch.isEmpty());
assertEquals(sketch.getN(), 1);
assertEquals(sketch.getNumRetained(), 1);
assertEquals(sketch.getRank("A", EXCLUSIVE), 0.0);
assertEquals(sketch.getRank("B", EXCLUSIVE), 1.0);
assertEquals(sketch.getRank("A", EXCLUSIVE), 0.0);
assertEquals(sketch.getRank("B", EXCLUSIVE), 1.0);
assertEquals(sketch.getRank("@", INCLUSIVE), 0.0);
assertEquals(sketch.getRank("A", INCLUSIVE), 1.0);
assertEquals(sketch.getMinItem(),"A");
assertEquals(sketch.getMaxItem(), "A");
assertEquals(sketch.getQuantile(0.5, EXCLUSIVE), "A");
assertEquals(sketch.getQuantile(0.5, INCLUSIVE), "A");
}
@Test
public void tenValues() {
final String[] tenStr = {"A","B","C","D","E","F","G","H","I","J"};
final KllItemsSketch<String> sketch = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
final int strLen = tenStr.length;
final double dblStrLen = strLen;
for (int i = 1; i <= strLen; i++) { sketch.update(tenStr[i - 1]); }
assertFalse(sketch.isEmpty());
assertEquals(sketch.getN(), strLen);
assertEquals(sketch.getNumRetained(), strLen);
for (int i = 1; i <= strLen; i++) {
assertEquals(sketch.getRank(tenStr[i - 1], EXCLUSIVE), (i - 1) / dblStrLen);
assertEquals(sketch.getRank(tenStr[i - 1], INCLUSIVE), i / dblStrLen);
}
final String[] qArr = tenStr;
double[] rOut = sketch.getRanks(qArr); //inclusive
for (int i = 0; i < qArr.length; i++) {
assertEquals(rOut[i], (i + 1) / dblStrLen);
}
rOut = sketch.getRanks(qArr, EXCLUSIVE); //exclusive
for (int i = 0; i < qArr.length; i++) {
assertEquals(rOut[i], i / 10.0);
}
for (int i = 0; i <= strLen; i++) {
double rank = i/dblStrLen;
String q = rank == 1.0 ? tenStr[i-1] : tenStr[i];
assertEquals(sketch.getQuantile(rank, EXCLUSIVE), q);
q = rank == 0 ? tenStr[i] : tenStr[i - 1];
assertEquals(sketch.getQuantile(rank, INCLUSIVE), q); //ERROR
}
{
// getQuantile() and getQuantiles() equivalence EXCLUSIVE
final String[] quantiles =
sketch.getQuantiles(new double[] {0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0}, EXCLUSIVE);
for (int i = 0; i <= 10; i++) {
assertEquals(sketch.getQuantile(i / 10.0, EXCLUSIVE), quantiles[i]);
}
}
{
// getQuantile() and getQuantiles() equivalence INCLUSIVE
final String[] quantiles =
sketch.getQuantiles(new double[] {0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1}, INCLUSIVE);
for (int i = 0; i <= 10; i++) {
assertEquals(sketch.getQuantile(i / 10.0, INCLUSIVE), quantiles[i]);
}
}
}
@Test
public void manyValuesEstimationMode() {
final KllItemsSketch<String> sketch = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
final int n = 1_000_000;
final int digits = Util.numDigits(n);
for (int i = 1; i <= n; i++) {
sketch.update(Util.intToFixedLengthString(i, digits));
assertEquals(sketch.getN(), i);
}
// test getRank
for (int i = 1; i <= n; i++) {
final double trueRank = (double) i / n;
String s = Util.intToFixedLengthString(i, digits);
double r = sketch.getRank(s);
assertEquals(r, trueRank, PMF_EPS_FOR_K_256, "for value " + s);
}
// test getPMF
String s = Util.intToFixedLengthString(n/2, digits);
final double[] pmf = sketch.getPMF(new String[] {s}); // split at median
assertEquals(pmf.length, 2);
assertEquals(pmf[0], 0.5, PMF_EPS_FOR_K_256);
assertEquals(pmf[1], 0.5, PMF_EPS_FOR_K_256);
assertEquals(sketch.getMinItem(), Util.intToFixedLengthString(1, digits));
assertEquals(sketch.getMaxItem(), Util.intToFixedLengthString(n, digits));
// check at every 0.1 percentage point
final double[] fractions = new double[1001];
final double[] reverseFractions = new double[1001]; // check that ordering doesn't matter
for (int i = 0; i <= 1000; i++) {
fractions[i] = (double) i / 1000;
reverseFractions[1000 - i] = fractions[i];
}
final String[] quantiles = sketch.getQuantiles(fractions);
final String[] reverseQuantiles = sketch.getQuantiles(reverseFractions);
String previousQuantile = "";
for (int i = 0; i <= 1000; i++) {
final String quantile = sketch.getQuantile(fractions[i]);
assertEquals(quantile, quantiles[i]);
assertEquals(quantile, reverseQuantiles[1000 - i]);
assertTrue(Util.le(previousQuantile, quantile, Comparator.naturalOrder()));
previousQuantile = quantile;
}
}
@Test
public void getRankGetCdfGetPmfConsistency() {
final KllItemsSketch<String> sketch = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
final int n = 1000;
final int digits = Util.numDigits(n);
final String[] quantiles = new String[n];
for (int i = 0; i < n; i++) {
final String str = Util.intToFixedLengthString(i, digits);
sketch.update(str);
quantiles[i] = str;
}
{ //EXCLUSIVE
final double[] ranks = sketch.getCDF(quantiles, EXCLUSIVE);
final double[] pmf = sketch.getPMF(quantiles, EXCLUSIVE);
double sumPmf = 0;
for (int i = 0; i < n; i++) {
assertEquals(ranks[i], sketch.getRank(quantiles[i], EXCLUSIVE), NUMERIC_NOISE_TOLERANCE, "rank vs CDF for value " + i);
sumPmf += pmf[i];
assertEquals(ranks[i], sumPmf, NUMERIC_NOISE_TOLERANCE, "CDF vs PMF for value " + i);
}
sumPmf += pmf[n];
assertEquals(sumPmf, 1.0, NUMERIC_NOISE_TOLERANCE);
assertEquals(ranks[n], 1.0, NUMERIC_NOISE_TOLERANCE);
}
{ // INCLUSIVE (default)
final double[] ranks = sketch.getCDF(quantiles, INCLUSIVE);
final double[] pmf = sketch.getPMF(quantiles, INCLUSIVE);
double sumPmf = 0;
for (int i = 0; i < n; i++) {
assertEquals(ranks[i], sketch.getRank(quantiles[i], INCLUSIVE), NUMERIC_NOISE_TOLERANCE,
"rank vs CDF for value " + i);
sumPmf += pmf[i];
assertEquals(ranks[i], sumPmf, NUMERIC_NOISE_TOLERANCE, "CDF vs PMF for value " + i);
}
sumPmf += pmf[n];
assertEquals(sumPmf, 1.0, NUMERIC_NOISE_TOLERANCE);
assertEquals(ranks[n], 1.0, NUMERIC_NOISE_TOLERANCE);
}
}
@Test
public void merge() {
final KllItemsSketch<String> sketch1 = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
final KllItemsSketch<String> sketch2 = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
final int n = 10000;
final int digits = Util.numDigits(2 * n);
for (int i = 0; i < n; i++) {
sketch1.update(Util.intToFixedLengthString(i, digits));
sketch2.update(Util.intToFixedLengthString(2 * n - i - 1, digits));
}
assertEquals(sketch1.getMinItem(), Util.intToFixedLengthString(0, digits));
assertEquals(sketch1.getMaxItem(), Util.intToFixedLengthString(n - 1, digits));
assertEquals(sketch2.getMinItem(), Util.intToFixedLengthString(n, digits));
assertEquals(sketch2.getMaxItem(), Util.intToFixedLengthString(2 * n - 1, digits));
sketch1.merge(sketch2);
assertFalse(sketch1.isEmpty());
assertEquals(sketch1.getN(), 2L * n);
assertEquals(sketch1.getMinItem(), Util.intToFixedLengthString(0, digits));
assertEquals(sketch1.getMaxItem(), Util.intToFixedLengthString(2 * n - 1, digits));
String upperBound = Util.intToFixedLengthString(n + (int)ceil(n * PMF_EPS_FOR_K_256), digits);
String lowerBound = Util.intToFixedLengthString(n - (int)ceil(n * PMF_EPS_FOR_K_256), digits);
String median = sketch1.getQuantile(0.5);
assertTrue(Util.le(median, upperBound, Comparator.naturalOrder()));
assertTrue(Util.le(lowerBound, median, Comparator.naturalOrder()));
}
@Test
public void mergeLowerK() {
final KllItemsSketch<String> sketch1 = KllItemsSketch.newHeapInstance(256, Comparator.naturalOrder(), serDe);
final KllItemsSketch<String> sketch2 = KllItemsSketch.newHeapInstance(128, Comparator.naturalOrder(), serDe);
final int n = 10000;
final int digits = Util.numDigits(2 * n);
for (int i = 0; i < n; i++) {
sketch1.update(Util.intToFixedLengthString(i, digits));
sketch2.update(Util.intToFixedLengthString(2 * n - i - 1, digits));
}
assertEquals(sketch1.getMinItem(), Util.intToFixedLengthString(0, digits));
assertEquals(sketch1.getMaxItem(), Util.intToFixedLengthString(n - 1, digits));
assertEquals(sketch2.getMinItem(), Util.intToFixedLengthString(n, digits));
assertEquals(sketch2.getMaxItem(), Util.intToFixedLengthString(2 * n - 1, digits));
assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false));
assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true));
sketch1.merge(sketch2);
// sketch1 must get "contaminated" by the lower K in sketch2
assertEquals(sketch1.getNormalizedRankError(false), sketch2.getNormalizedRankError(false));
assertEquals(sketch1.getNormalizedRankError(true), sketch2.getNormalizedRankError(true));
assertFalse(sketch1.isEmpty());
assertEquals(sketch1.getN(), 2 * n);
assertEquals(sketch1.getMinItem(), Util.intToFixedLengthString(0, digits));
assertEquals(sketch1.getMaxItem(), Util.intToFixedLengthString(2 * n - 1, digits));
String upperBound = Util.intToFixedLengthString(n + (int)ceil(2 * n * PMF_EPS_FOR_K_128), digits);
String lowerBound = Util.intToFixedLengthString(n - (int)ceil(2 * n * PMF_EPS_FOR_K_128), digits);
String median = sketch1.getQuantile(0.5);
assertTrue(Util.le(median, upperBound, Comparator.naturalOrder()));
assertTrue(Util.le(lowerBound, median, Comparator.naturalOrder()));
}
@Test
public void mergeEmptyLowerK() {
final KllItemsSketch<String> sketch1 = KllItemsSketch.newHeapInstance(256, Comparator.naturalOrder(), serDe);
final KllItemsSketch<String> sketch2 = KllItemsSketch.newHeapInstance(128, Comparator.naturalOrder(), serDe);
final int n = 10000;
final int digits = Util.numDigits(n);
for (int i = 0; i < n; i++) {
sketch1.update(Util.intToFixedLengthString(i, digits)); //sketch2 is empty
}
// rank error should not be affected by a merge with an empty sketch with lower K
final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true);
sketch1.merge(sketch2);
assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge);
{
assertFalse(sketch1.isEmpty());
assertTrue(sketch2.isEmpty());
assertEquals(sketch1.getN(), n);
assertEquals(sketch1.getMinItem(), Util.intToFixedLengthString(0, digits));
assertEquals(sketch1.getMaxItem(), Util.intToFixedLengthString(n - 1, digits));
String upperBound = Util.intToFixedLengthString(n / 2 + (int)ceil(n * PMF_EPS_FOR_K_256), digits);
String lowerBound = Util.intToFixedLengthString(n / 2 - (int)ceil(n * PMF_EPS_FOR_K_256), digits);
String median = sketch1.getQuantile(0.5);
assertTrue(Util.le(median, upperBound, Comparator.naturalOrder()));
assertTrue(Util.le(lowerBound, median, Comparator.naturalOrder()));
}
{
//merge the other way
sketch2.merge(sketch1);
assertFalse(sketch1.isEmpty());
assertFalse(sketch2.isEmpty());
assertEquals(sketch1.getN(), n);
assertEquals(sketch2.getN(), n);
assertEquals(sketch1.getMinItem(), Util.intToFixedLengthString(0, digits));
assertEquals(sketch1.getMaxItem(), Util.intToFixedLengthString(n - 1, digits));
assertEquals(sketch2.getMinItem(), Util.intToFixedLengthString(0, digits));
assertEquals(sketch2.getMaxItem(), Util.intToFixedLengthString(n - 1, digits));
String upperBound = Util.intToFixedLengthString(n / 2 + (int)ceil(n * PMF_EPS_FOR_K_128), digits);
String lowerBound = Util.intToFixedLengthString(n / 2 - (int)ceil(n * PMF_EPS_FOR_K_128), digits);
String median = sketch2.getQuantile(0.5);
assertTrue(Util.le(median, upperBound, Comparator.naturalOrder()));
assertTrue(Util.le(lowerBound, median, Comparator.naturalOrder()));
}
}
@Test
public void mergeExactModeLowerK() {
final KllItemsSketch<String> sketch1 = KllItemsSketch.newHeapInstance(256, Comparator.naturalOrder(), serDe);
final KllItemsSketch<String> sketch2 = KllItemsSketch.newHeapInstance(128, Comparator.naturalOrder(), serDe);
final int n = 10000;
final int digits = Util.numDigits(n);
for (int i = 0; i < n; i++) {
sketch1.update(Util.intToFixedLengthString(i, digits));
}
sketch2.update(Util.intToFixedLengthString(1, digits));
// rank error should not be affected by a merge with a sketch in exact mode with lower K
final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true);
sketch1.merge(sketch2);
assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge);
}
@Test
public void mergeMinMinValueFromOther() {
final KllItemsSketch<String> sketch1 = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
final KllItemsSketch<String> sketch2 = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
sketch1.update(Util.intToFixedLengthString(1, 1));
sketch2.update(Util.intToFixedLengthString(2, 1));
sketch2.merge(sketch1);
assertEquals(sketch2.getMinItem(), Util.intToFixedLengthString(1, 1));
}
@Test
public void mergeMinAndMaxFromOther() {
final KllItemsSketch<String> sketch1 = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
final KllItemsSketch<String> sketch2 = KllItemsSketch.newHeapInstance(10, Comparator.naturalOrder(), serDe);
final int n = 1_000_000;
final int digits = Util.numDigits(n);
for (int i = 1; i <= 1_000_000; i++) {
sketch1.update(Util.intToFixedLengthString(i, digits)); //sketch2 is empty
}
sketch2.merge(sketch1);
assertEquals(sketch2.getMinItem(), Util.intToFixedLengthString(1, digits));
assertEquals(sketch2.getMaxItem(), Util.intToFixedLengthString(n, digits));
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void kTooSmall() {
KllItemsSketch.newHeapInstance(KllSketch.DEFAULT_M - 1, Comparator.naturalOrder(), serDe);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void kTooLarge() {
KllItemsSketch.newHeapInstance(KllSketch.MAX_K + 1, Comparator.naturalOrder(), serDe);
}
@Test
public void minK() {
final KllItemsSketch<String> sketch =
KllItemsSketch.newHeapInstance(KllSketch.DEFAULT_M,Comparator.naturalOrder(), serDe);
final int n = 1000;
final int digits = Util.numDigits(n);
for (int i = 0; i < n; i++) {
sketch.update(Util.intToFixedLengthString(i, digits));
}
assertEquals(sketch.getK(), KllSketch.DEFAULT_M);
String upperBound = Util.intToFixedLengthString(n / 2 + (int)ceil(n * PMF_EPS_FOR_K_8), digits);
String lowerBound = Util.intToFixedLengthString(n / 2 - (int)ceil(n * PMF_EPS_FOR_K_8), digits);
String median = sketch.getQuantile(0.5);
assertTrue(Util.le(median, upperBound, Comparator.naturalOrder()));
assertTrue(Util.le(lowerBound, median, Comparator.naturalOrder()));
}
@Test
public void maxK() {
final KllItemsSketch<String> sketch =
KllItemsSketch.newHeapInstance(KllSketch.MAX_K,Comparator.naturalOrder(), serDe);
final int n = 1000;
final int digits = Util.numDigits(n);
for (int i = 0; i < n; i++) {
sketch.update(Util.intToFixedLengthString(i, digits));
}
assertEquals(sketch.getK(), KllSketch.MAX_K);
String upperBound = Util.intToFixedLengthString(n / 2 + (int)ceil(n * PMF_EPS_FOR_K_256), digits);
String lowerBound = Util.intToFixedLengthString(n / 2 - (int)ceil(n * PMF_EPS_FOR_K_256), digits);
String median = sketch.getQuantile(0.5);
assertTrue(Util.le(median, upperBound, Comparator.naturalOrder()));
assertTrue(Util.le(lowerBound, median, Comparator.naturalOrder()));
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void outOfOrderSplitPoints() {
final KllItemsSketch<String> sketch = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
final String s0 = Util.intToFixedLengthString(0, 1);
final String s1 = Util.intToFixedLengthString(1, 1);
sketch.update(s0);
sketch.getCDF(new String[] {s1, s0});
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void nullSplitPoint() {
final KllItemsSketch<String> sketch = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
sketch.update(Util.intToFixedLengthString(0, 1));
sketch.getCDF(new String[] {null});
}
@Test
public void getQuantiles() {
final KllItemsSketch<String> sketch = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
sketch.update("A");
sketch.update("B");
sketch.update("C");
sketch.update("D");
String[] quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, EXCLUSIVE);
String[] quantiles2 = sketch.getPartitionBoundaries(2, EXCLUSIVE).boundaries;
assertEquals(quantiles1, quantiles2);
quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, INCLUSIVE);
quantiles2 = sketch.getPartitionBoundaries(2, INCLUSIVE).boundaries;
assertEquals(quantiles1, quantiles2);
}
@Test
public void checkReset() {
final KllItemsSketch<String> sketch = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
final int n = 100;
final int digits = Util.numDigits(n);
for (int i = 1; i <= n; i++) { sketch.update(Util.intToFixedLengthString(i, digits)); }
long n1 = sketch.getN();
String min1 = sketch.getMinItem();
String max1 = sketch.getMaxItem();
sketch.reset();
for (int i = 1; i <= 100; i++) { sketch.update(Util.intToFixedLengthString(i, digits)); }
long n2 = sketch.getN();
String min2 = sketch.getMinItem();
String max2 = sketch.getMaxItem();
assertEquals(n2, n1);
assertEquals(min2, min1);
assertEquals(max2, max1);
}
@Test
public void checkReadOnlyUpdate() {
KllItemsSketch<String> sk1 = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
Memory mem = Memory.wrap(sk1.toByteArray());
KllItemsSketch<String> sk2 = KllItemsSketch.wrap(mem, Comparator.naturalOrder(), serDe);
try { sk2.update("A"); fail(); } catch (SketchesArgumentException e) { }
}
@Test
public void checkNewDirectInstanceAndSmallSize() {
KllItemsSketch<String> sk1 = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
Memory mem = Memory.wrap(sk1.toByteArray());
KllItemsSketch<String> sk2 = KllItemsSketch.wrap(mem, Comparator.naturalOrder(), serDe);
int sizeBytes = sk2.currentSerializedSizeBytes(false);
assertEquals(sizeBytes, 8);
sk1.update("A");
mem = Memory.wrap(sk1.toByteArray());
sk2 = KllItemsSketch.wrap(mem, Comparator.naturalOrder(), serDe);
sizeBytes = sk2.currentSerializedSizeBytes(false);
assertEquals(sizeBytes, 8 + 5);
sk1.update("B");
mem = Memory.wrap(sk1.toByteArray());
sk2 = KllItemsSketch.wrap(mem, Comparator.naturalOrder(), serDe);
sizeBytes = sk2.currentSerializedSizeBytes(false);
assertEquals(sizeBytes, 20 + 4 + 2 * 5 + 2 * 5);
}
@Test
public void sortedView() {
final KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
sk.update("A");
sk.update("AB");
sk.update("ABC");
final GenericSortedView<String> view = sk.getSortedView();
final GenericSortedViewIterator<String> itr = view.iterator();
assertEquals(itr.next(), true);
assertEquals(itr.getQuantile(), "A");
assertEquals(itr.getWeight(), 1);
assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 0);
assertEquals(itr.getCumulativeWeight(INCLUSIVE), 1);
assertEquals(itr.next(), true);
assertEquals(itr.getQuantile(), "AB");
assertEquals(itr.getWeight(), 1);
assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 1);
assertEquals(itr.getCumulativeWeight(INCLUSIVE), 2);
assertEquals(itr.next(), true);
assertEquals(itr.getQuantile(), "ABC");
assertEquals(itr.getWeight(), 1);
assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 2);
assertEquals(itr.getCumulativeWeight(INCLUSIVE), 3);
assertEquals(itr.next(), false);
}
@Test //also visual
public void checkCDF_PDF() {
final double[] cdfI = {.25, .50, .75, 1.0, 1.0 };
final double[] cdfE = {0.0, .25, .50, .75, 1.0 };
final double[] pmfI = {.25, .25, .25, .25, 0.0 };
final double[] pmfE = {0.0, .25, .25, .25, .25 };
final double toll = 1E-10;
final KllItemsSketch<String> sketch = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
final String[] strIn = {"A", "AB", "ABC", "ABCD"};
for (int i = 0; i < strIn.length; i++) { sketch.update(strIn[i]); }
String[] sp = new String[] {"A", "AB", "ABC", "ABCD"};
println("SplitPoints:");
for (int i = 0; i < sp.length; i++) {
printf("%10s", sp[i]);
}
println("");
println("INCLUSIVE:");
double[] cdf = sketch.getCDF(sp, INCLUSIVE);
double[] pmf = sketch.getPMF(sp, INCLUSIVE);
printf("%10s%10s\n", "CDF", "PMF");
for (int i = 0; i < cdf.length; i++) {
printf("%10.2f%10.2f\n", cdf[i], pmf[i]);
assertEquals(cdf[i], cdfI[i], toll);
assertEquals(pmf[i], pmfI[i], toll);
}
println("EXCLUSIVE");
cdf = sketch.getCDF(sp, EXCLUSIVE);
pmf = sketch.getPMF(sp, EXCLUSIVE);
printf("%10s%10s\n", "CDF", "PMF");
for (int i = 0; i < cdf.length; i++) {
printf("%10.2f%10.2f\n", cdf[i], pmf[i]);
assertEquals(cdf[i], cdfE[i], toll);
assertEquals(pmf[i], pmfE[i], toll);
}
}
@Test
public void checkWrapCase1Floats() {
KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
final int n = 21;
final int digits = Util.numDigits(n);
for (int i = 1; i <= n; i++) { sk.update(Util.intToFixedLengthString(i, digits)); }
Memory mem = Memory.wrap(sk.toByteArray());
KllItemsSketch<String> sk2 = KllItemsSketch.wrap(mem, Comparator.naturalOrder(), serDe);
assertTrue(mem.isReadOnly());
assertTrue(sk2.isReadOnly());
assertFalse(sk2.isDirect());
}
@Test
public void checkReadOnlyExceptions() {
int[] intArr = new int[0];
int intV = 2;
int idx = 1;
KllItemsSketch<String> sk1 = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
Memory mem = Memory.wrap(sk1.toByteArray());
KllItemsSketch<String> sk2 = KllItemsSketch.wrap(mem, Comparator.naturalOrder(), serDe);
try { sk2.setLevelsArray(intArr); fail(); } catch (SketchesArgumentException e) { }
try { sk2.setLevelsArrayAt(idx,intV); fail(); } catch (SketchesArgumentException e) { }
}
@Test
public void checkIsSameResource() {
int cap = 128;
WritableMemory wmem = WritableMemory.allocate(cap);
WritableMemory reg1 = wmem.writableRegion(0, 64);
WritableMemory reg2 = wmem.writableRegion(64, 64);
assertFalse(reg1 == reg2);
assertFalse(reg1.isSameResource(reg2));
WritableMemory reg3 = wmem.writableRegion(0, 64);
assertFalse(reg1 == reg3);
assertTrue(reg1.isSameResource(reg3));
byte[] byteArr1 = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe).toByteArray();
reg1.putByteArray(0, byteArr1, 0, byteArr1.length);
KllItemsSketch<String> sk1 = KllItemsSketch.wrap(reg1, Comparator.naturalOrder(), serDe);
byte[] byteArr2 = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe).toByteArray();
reg2.putByteArray(0, byteArr2, 0, byteArr2.length);
assertFalse(sk1.isSameResource(reg2));
byte[] byteArr3 = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe).toByteArray();
reg3.putByteArray(0, byteArr3, 0, byteArr3.length);
assertTrue(sk1.isSameResource(reg3));
}
// New added tests specially for KllItemsSketch
@Test
public void checkHeapifyEmpty() {
final KllItemsSketch<String> sk1 = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
Memory mem = Memory.wrap(sk1.toByteArray());
KllMemoryValidate memVal = new KllMemoryValidate(mem, SketchType.ITEMS_SKETCH, serDe);
assertEquals(memVal.sketchStructure, COMPACT_EMPTY);
assertEquals(mem.getCapacity(), 8);
final KllItemsSketch<String> sk2 = KllItemsSketch.heapify(mem, Comparator.naturalOrder(), serDe);
assertEquals(sk2.sketchStructure, UPDATABLE);
assertEquals(sk2.getN(), 0);
assertFalse(sk2.isReadOnly());
try { sk2.getMinItem(); fail(); } catch (SketchesArgumentException e) { }
try { sk2.getMaxItem(); fail(); } catch (SketchesArgumentException e) { }
println(sk1.toString(true, true));
println("");
println(KllPreambleUtil.toString(mem, ITEMS_SKETCH, true, serDe));
}
@Test
public void checkHeapifySingleItem() {
final KllItemsSketch<String> sk1 = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
sk1.update("A");
Memory mem = Memory.wrap(sk1.toByteArray());
KllMemoryValidate memVal = new KllMemoryValidate(mem, SketchType.ITEMS_SKETCH, serDe);
assertEquals(memVal.sketchStructure, COMPACT_SINGLE);
assertEquals(mem.getCapacity(), memVal.sketchBytes);
final KllItemsSketch<String> sk2 = KllItemsSketch.heapify(mem, Comparator.naturalOrder(), serDe);
assertEquals(sk2.sketchStructure, UPDATABLE);
assertEquals(sk2.getN(), 1);
assertFalse(sk2.isReadOnly());
assertEquals(sk2.getMinItem(), "A");
assertEquals(sk2.getMaxItem(), "A");
println(sk1.toString(true, true));
println("");
println(KllPreambleUtil.toString(mem, ITEMS_SKETCH, true, serDe));
}
@Test
public void checkHeapifyFewItems() {
final KllItemsSketch<String> sk1 = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
sk1.update("A");
sk1.update("AB");
sk1.update("ABC");
Memory mem = Memory.wrap(sk1.toByteArray());
KllMemoryValidate memVal = new KllMemoryValidate(mem, SketchType.ITEMS_SKETCH, serDe);
assertEquals(memVal.sketchStructure, COMPACT_FULL);
assertEquals(mem.getCapacity(), memVal.sketchBytes);
println(sk1.toString(true, true));
println("");
println(KllPreambleUtil.toString(mem, ITEMS_SKETCH, true, serDe));
}
@Test
public void checkHeapifyManyItems() {
final KllItemsSketch<String> sk1 = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
final int n = 109;
final int digits = Util.numDigits(n);
for (int i = 1; i <= n; i++) {
sk1.update(Util.intToFixedLengthString(i, digits));
}
Memory mem = Memory.wrap(sk1.toByteArray());
KllMemoryValidate memVal = new KllMemoryValidate(mem, SketchType.ITEMS_SKETCH, serDe);
assertEquals(memVal.sketchStructure, COMPACT_FULL);
assertEquals(mem.getCapacity(), memVal.sketchBytes);
println(sk1.toString(true, true));
println("");
println(KllPreambleUtil.toString(mem, ITEMS_SKETCH, true, serDe));
}
@Test
public void checkWrapCausingLevelsCompaction() {
final KllItemsSketch<String> sk1 = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
final int n = 109;
final int digits = Util.numDigits(n);
for (int i = 1; i <= n; i++) {
sk1.update(Util.intToFixedLengthString(i, digits));
}
Memory mem = Memory.wrap(sk1.toByteArray());
KllItemsSketch<String> sk2 = KllItemsSketch.wrap(mem, Comparator.naturalOrder(), serDe);
assertTrue(mem.isReadOnly());
assertTrue(sk2.isReadOnly());
assertFalse(sk2.isDirect()); //not off-heap
println(sk1.toString(true, true));
println("");
println(sk2.toString(true, true));
println("");
println(KllPreambleUtil.toString(mem, ITEMS_SKETCH, true, serDe));
}
@Test
public void checkExceptions() {
final KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
try { sk.getTotalItemsByteArr(); fail(); } catch (SketchesArgumentException e) { }
try { sk.getTotalItemsNumBytes(); fail(); } catch (SketchesArgumentException e) { }
try { sk.setWritableMemory(null); fail(); } catch (SketchesArgumentException e) { }
byte[] byteArr = sk.toByteArray();
final KllItemsSketch<String> sk2 = KllItemsSketch.wrap(Memory.wrap(byteArr), Comparator.naturalOrder(), serDe);
try { sk2.incN(); fail(); } catch (SketchesArgumentException e) { }
try { sk2.setItemsArray(null); fail(); } catch (SketchesArgumentException e) { }
try { sk2.setItemsArrayAt(0, null); fail(); } catch (SketchesArgumentException e) { }
try { sk2.setLevelZeroSorted(false); fail(); } catch (SketchesArgumentException e) { }
try { sk2.setMaxItem(null); fail(); } catch (SketchesArgumentException e) { }
try { sk2.setMinItem(null); fail(); } catch (SketchesArgumentException e) { }
try { sk2.setMinK(0); fail(); } catch (SketchesArgumentException e) { }
try { sk2.setN(0); fail(); } catch (SketchesArgumentException e) { }
}
@Test
public void checkSortedViewAfterReset() {
KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
sk.update("1");
GenericSortedView<String> sv = sk.getSortedView();
String ssv = sv.getQuantile(1.0, INCLUSIVE);
assertEquals(ssv, "1");
sk.reset();
try { sk.getSortedView(); fail(); } catch (SketchesArgumentException e) { }
}
private final static boolean enablePrinting = false;
/**
* @param format the format
* @param args the args
*/
private static final void printf(final String format, final Object ...args) {
if (enablePrinting) { System.out.printf(format, args); }
}
/**
* @param o the Object to println
*/
private static final void println(final Object o) {
if (enablePrinting) { System.out.println(o.toString()); }
}
}