blob: a8ab1bd9cc317ddd2f7706c6d6d25bb326e39dc3 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.kll;
import static org.apache.datasketches.Util.getResourceBytes;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertNull;
import static org.testng.Assert.assertTrue;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.memory.Memory;
import org.testng.annotations.Test;
@SuppressWarnings("javadoc")
public class KllFloatsSketchTest {
private static final double PMF_EPS_FOR_K_8 = 0.35; // PMF rank error (epsilon) for k=8
private static final double PMF_EPS_FOR_K_128 = 0.025; // PMF rank error (epsilon) for k=128
private static final double PMF_EPS_FOR_K_256 = 0.013; // PMF rank error (epsilon) for k=256
private static final double NUMERIC_NOISE_TOLERANCE = 1E-6;
@Test
public void empty() {
final KllFloatsSketch sketch = new KllFloatsSketch();
sketch.update(Float.NaN); // this must not change anything
assertTrue(sketch.isEmpty());
assertEquals(sketch.getN(), 0);
assertEquals(sketch.getNumRetained(), 0);
assertTrue(Double.isNaN(sketch.getRank(0)));
assertTrue(Float.isNaN(sketch.getMinValue()));
assertTrue(Float.isNaN(sketch.getMaxValue()));
assertTrue(Float.isNaN(sketch.getQuantile(0.5)));
assertNull(sketch.getQuantiles(new double[] {0}));
assertNull(sketch.getPMF(new float[] {0}));
assertNotNull(sketch.toString(true, true));
assertNotNull(sketch.toString());
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void getQuantileInvalidArg() {
final KllFloatsSketch sketch = new KllFloatsSketch();
sketch.update(1);
sketch.getQuantile(-1.0);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void getQuantilesInvalidArg() {
final KllFloatsSketch sketch = new KllFloatsSketch();
sketch.update(1);
sketch.getQuantiles(new double[] {2.0});
}
@Test
public void oneItem() {
final KllFloatsSketch sketch = new KllFloatsSketch();
sketch.update(1);
assertFalse(sketch.isEmpty());
assertEquals(sketch.getN(), 1);
assertEquals(sketch.getNumRetained(), 1);
assertEquals(sketch.getRank(1), 0.0);
assertEquals(sketch.getRank(2), 1.0);
assertEquals(sketch.getMinValue(), 1f);
assertEquals(sketch.getMaxValue(), 1f);
assertEquals(sketch.getQuantile(0.5), 1f);
}
@Test
public void manyItemsEstimationMode() {
final KllFloatsSketch sketch = new KllFloatsSketch();
final int n = 1000000;
for (int i = 0; i < n; i++) {
sketch.update(i);
assertEquals(sketch.getN(), i + 1);
}
// test getRank
for (int i = 0; i < n; i++) {
final double trueRank = (double) i / n;
assertEquals(sketch.getRank(i), trueRank, PMF_EPS_FOR_K_256, "for value " + i);
}
// test getPMF
final double[] pmf = sketch.getPMF(new float[] {n / 2}); // split at median
assertEquals(pmf.length, 2);
assertEquals(pmf[0], 0.5, PMF_EPS_FOR_K_256);
assertEquals(pmf[1], 0.5, PMF_EPS_FOR_K_256);
assertEquals(sketch.getMinValue(), 0f); // min value is exact
assertEquals(sketch.getQuantile(0), 0f); // min value is exact
assertEquals(sketch.getMaxValue(), n - 1f); // max value is exact
assertEquals(sketch.getQuantile(1), n - 1f); // max value is exact
// check at every 0.1 percentage point
final double[] fractions = new double[1001];
final double[] reverseFractions = new double[1001]; // check that ordering doesn't matter
for (int i = 0; i <= 1000; i++) {
fractions[i] = (double) i / 1000;
reverseFractions[1000 - i] = fractions[i];
}
final float[] quantiles = sketch.getQuantiles(fractions);
final float[] reverseQuantiles = sketch.getQuantiles(reverseFractions);
float previousQuantile = 0;
for (int i = 0; i <= 1000; i++) {
final float quantile = sketch.getQuantile(fractions[i]);
assertEquals(quantile, quantiles[i]);
assertEquals(quantile, reverseQuantiles[1000 - i]);
assertTrue(previousQuantile <= quantile);
previousQuantile = quantile;
}
}
@Test
public void getRankGetCdfGetPmfConsistency() {
final KllFloatsSketch sketch = new KllFloatsSketch();
final int n = 1000;
final float[] values = new float[n];
for (int i = 0; i < n; i++) {
sketch.update(i);
values[i] = i;
}
final double[] ranks = sketch.getCDF(values);
final double[] pmf = sketch.getPMF(values);
double sumPmf = 0;
for (int i = 0; i < n; i++) {
assertEquals(ranks[i], sketch.getRank(values[i]), NUMERIC_NOISE_TOLERANCE,
"rank vs CDF for value " + i);
sumPmf += pmf[i];
assertEquals(ranks[i], sumPmf, NUMERIC_NOISE_TOLERANCE, "CDF vs PMF for value " + i);
}
sumPmf += pmf[n];
assertEquals(sumPmf, 1.0, NUMERIC_NOISE_TOLERANCE);
assertEquals(ranks[n], 1.0, NUMERIC_NOISE_TOLERANCE);
}
@Test
public void merge() {
final KllFloatsSketch sketch1 = new KllFloatsSketch();
final KllFloatsSketch sketch2 = new KllFloatsSketch();
final int n = 10000;
for (int i = 0; i < n; i++) {
sketch1.update(i);
sketch2.update(2 * n - i - 1);
}
assertEquals(sketch1.getMinValue(), 0.0f);
assertEquals(sketch1.getMaxValue(), n - 1f);
assertEquals(sketch2.getMinValue(), n);
assertEquals(sketch2.getMaxValue(), 2f * n - 1f);
sketch1.merge(sketch2);
assertFalse(sketch1.isEmpty());
assertEquals(sketch1.getN(), 2L * n);
assertEquals(sketch1.getMinValue(), 0f);
assertEquals(sketch1.getMaxValue(), 2f * n - 1);
assertEquals(sketch1.getQuantile(0.5), n, n * PMF_EPS_FOR_K_256);
}
@Test
public void mergeLowerK() {
final KllFloatsSketch sketch1 = new KllFloatsSketch(256);
final KllFloatsSketch sketch2 = new KllFloatsSketch(128);
final int n = 10000;
for (int i = 0; i < n; i++) {
sketch1.update(i);
sketch2.update(2 * n - i - 1);
}
assertEquals(sketch1.getMinValue(), 0.0f);
assertEquals(sketch1.getMaxValue(), n - 1f);
assertEquals(sketch2.getMinValue(), n);
assertEquals(sketch2.getMaxValue(), 2f * n - 1f);
assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false));
assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true));
sketch1.merge(sketch2);
// sketch1 must get "contaminated" by the lower K in sketch2
assertEquals(sketch1.getNormalizedRankError(false), sketch2.getNormalizedRankError(false));
assertEquals(sketch1.getNormalizedRankError(true), sketch2.getNormalizedRankError(true));
assertFalse(sketch1.isEmpty());
assertEquals(sketch1.getN(), 2 * n);
assertEquals(sketch1.getMinValue(), 0f);
assertEquals(sketch1.getMaxValue(), 2f * n - 1f);
assertEquals(sketch1.getQuantile(0.5), n, n * PMF_EPS_FOR_K_128);
}
@Test
public void mergeEmptyLowerK() {
final KllFloatsSketch sketch1 = new KllFloatsSketch(256);
final KllFloatsSketch sketch2 = new KllFloatsSketch(128);
final int n = 10000;
for (int i = 0; i < n; i++) {
sketch1.update(i);
}
// rank error should not be affected by a merge with an empty sketch with lower K
final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true);
sketch1.merge(sketch2);
assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge);
assertFalse(sketch1.isEmpty());
assertEquals(sketch1.getN(), n);
assertEquals(sketch1.getMinValue(), 0f);
assertEquals(sketch1.getMaxValue(), n - 1f);
assertEquals(sketch1.getQuantile(0.5), n / 2f, n / 2 * PMF_EPS_FOR_K_256);
//merge the other way
sketch2.merge(sketch1);
assertFalse(sketch1.isEmpty());
assertEquals(sketch1.getN(), n);
assertEquals(sketch1.getMinValue(), 0f);
assertEquals(sketch1.getMaxValue(), n - 1f);
assertEquals(sketch1.getQuantile(0.5), n / 2f, n / 2 * PMF_EPS_FOR_K_256);
}
@Test
public void mergeExactModeLowerK() {
final KllFloatsSketch sketch1 = new KllFloatsSketch(256);
final KllFloatsSketch sketch2 = new KllFloatsSketch(128);
final int n = 10000;
for (int i = 0; i < n; i++) {
sketch1.update(i);
}
sketch2.update(1);
// rank error should not be affected by a merge with a sketch in exact mode with lower K
final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true);
sketch1.merge(sketch2);
assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge);
}
@Test
public void mergeMinMinValueFromOther() {
final KllFloatsSketch sketch1 = new KllFloatsSketch();
final KllFloatsSketch sketch2 = new KllFloatsSketch();
sketch1.update(1);
sketch2.update(2);
sketch2.merge(sketch1);
assertEquals(sketch2.getMinValue(), 1.0F);
}
@Test
public void mergeMinAndMaxFromOther() {
final KllFloatsSketch sketch1 = new KllFloatsSketch();
for (int i = 0; i < 1000000; i++) {
sketch1.update(i);
}
final KllFloatsSketch sketch2 = new KllFloatsSketch();
sketch2.merge(sketch1);
assertEquals(sketch2.getMinValue(), 0F);
assertEquals(sketch2.getMaxValue(), 999999F);
}
@SuppressWarnings("unused")
@Test(expectedExceptions = SketchesArgumentException.class)
public void kTooSmall() {
new KllFloatsSketch(KllFloatsSketch.MIN_K - 1);
}
@SuppressWarnings("unused")
@Test(expectedExceptions = SketchesArgumentException.class)
public void kTooLarge() {
new KllFloatsSketch(KllFloatsSketch.MAX_K + 1);
}
@Test
public void minK() {
final KllFloatsSketch sketch = new KllFloatsSketch(KllFloatsSketch.MIN_K);
for (int i = 0; i < 1000; i++) {
sketch.update(i);
}
assertEquals(sketch.getK(), KllFloatsSketch.MIN_K);
assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_8);
}
@Test
public void maxK() {
final KllFloatsSketch sketch = new KllFloatsSketch(KllFloatsSketch.MAX_K);
for (int i = 0; i < 1000; i++) {
sketch.update(i);
}
assertEquals(sketch.getK(), KllFloatsSketch.MAX_K);
assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_256);
}
@Test
public void serializeDeserializeEmpty() {
final KllFloatsSketch sketch1 = new KllFloatsSketch();
final byte[] bytes = sketch1.toByteArray();
final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes));
assertEquals(bytes.length, sketch1.getSerializedSizeBytes());
assertTrue(sketch2.isEmpty());
assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained());
assertEquals(sketch2.getN(), sketch1.getN());
assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false));
assertTrue(Float.isNaN(sketch2.getMinValue()));
assertTrue(Float.isNaN(sketch2.getMaxValue()));
assertEquals(sketch2.getSerializedSizeBytes(), sketch1.getSerializedSizeBytes());
}
@Test
public void serializeDeserializeOneItem() {
final KllFloatsSketch sketch1 = new KllFloatsSketch();
sketch1.update(1);
final byte[] bytes = sketch1.toByteArray();
final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes));
assertEquals(bytes.length, sketch1.getSerializedSizeBytes());
assertFalse(sketch2.isEmpty());
assertEquals(sketch2.getNumRetained(), 1);
assertEquals(sketch2.getN(), 1);
assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false));
assertFalse(Float.isNaN(sketch2.getMinValue()));
assertFalse(Float.isNaN(sketch2.getMaxValue()));
assertEquals(sketch2.getSerializedSizeBytes(), 12);
}
@Test
public void deserializeOneItemV1() throws Exception {
final byte[] bytes = getResourceBytes("kll_sketch_float_one_item_v1.sk");
final KllFloatsSketch sketch = KllFloatsSketch.heapify(Memory.wrap(bytes));
assertFalse(sketch.isEmpty());
assertFalse(sketch.isEstimationMode());
assertEquals(sketch.getN(), 1);
assertEquals(sketch.getNumRetained(), 1);
}
@Test
public void serializeDeserialize() {
final KllFloatsSketch sketch1 = new KllFloatsSketch();
final int n = 1000;
for (int i = 0; i < n; i++) {
sketch1.update(i);
}
final byte[] bytes = sketch1.toByteArray();
final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes));
assertEquals(bytes.length, sketch1.getSerializedSizeBytes());
assertFalse(sketch2.isEmpty());
assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained());
assertEquals(sketch2.getN(), sketch1.getN());
assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false));
assertEquals(sketch2.getMinValue(), sketch1.getMinValue());
assertEquals(sketch2.getMaxValue(), sketch1.getMaxValue());
assertEquals(sketch2.getSerializedSizeBytes(), sketch1.getSerializedSizeBytes());
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void outOfOrderSplitPoints() {
final KllFloatsSketch sketch = new KllFloatsSketch();
sketch.update(0);
sketch.getCDF(new float[] {1, 0});
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void nanSplitPoint() {
final KllFloatsSketch sketch = new KllFloatsSketch();
sketch.update(0);
sketch.getCDF(new float[] {Float.NaN});
}
@Test
public void getMaxSerializedSizeBytes() {
final int sizeBytes =
KllFloatsSketch.getMaxSerializedSizeBytes(KllFloatsSketch.DEFAULT_K, 1_000_000_000);
assertEquals(sizeBytes, 3160);
}
@Test
public void checkUbOnNumLevels() {
assertEquals(KllHelper.ubOnNumLevels(0), 1);
}
@Test
public void checkIntCapAux() {
int lvlCap = KllHelper.levelCapacity(10, 61, 0, 8);
assertEquals(lvlCap, 8);
lvlCap = KllHelper.levelCapacity(10, 61, 60, 8);
assertEquals(lvlCap, 10);
}
@Test
public void checkSuperLargeKandLevels() {
//This is beyond what the sketch can be configured for.
final int size = KllHelper.computeTotalCapacity(1 << 29, 8, 61);
assertEquals(size, 1_610_612_846);
}
@Test
public void getQuantiles() {
final KllFloatsSketch sketch = new KllFloatsSketch();
sketch.update(1);
sketch.update(2);
sketch.update(3);
final float[] quantiles1 = sketch.getQuantiles(new double[] {0, 0.5, 1});
final float[] quantiles2 = sketch.getQuantiles(3);
assertEquals(quantiles1, quantiles2);
assertEquals(quantiles1[0], 1f);
assertEquals(quantiles1[1], 2f);
assertEquals(quantiles1[2], 3f);
}
}