blob: 21d6ce988e88af33726e6fe5a7db8316aa006344 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.quantiles;
import static org.apache.datasketches.quantilescommon.LongsAsOrderableStrings.getString;
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE;
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Random;
import org.apache.datasketches.common.ArrayOfBooleansSerDe;
import org.apache.datasketches.common.ArrayOfDoublesSerDe;
import org.apache.datasketches.common.ArrayOfItemsSerDe;
import org.apache.datasketches.common.ArrayOfLongsSerDe;
import org.apache.datasketches.common.ArrayOfStringsSerDe;
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.quantilescommon.GenericSortedView;
import org.apache.datasketches.quantilescommon.GenericSortedViewIterator;
import org.apache.datasketches.quantilescommon.ItemsSketchSortedView;
import org.apache.datasketches.quantilescommon.QuantilesGenericSketchIterator;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
public class ItemsSketchTest {
@BeforeMethod
public void setUp() {
ItemsSketch.rand.setSeed(32749); // make sketches deterministic for testing
}
@Test
public void empty() {
final ItemsSketch<String> sketch = ItemsSketch.getInstance(String.class, 128, Comparator.naturalOrder());
assertNotNull(sketch);
sketch.update(null);
assertTrue(sketch.isEmpty());
assertEquals(sketch.getN(), 0);
assertEquals(sketch.getNumRetained(), 0);
try { sketch.getMinItem(); fail(); } catch (IllegalArgumentException e) {}
try { sketch.getMaxItem(); fail(); } catch (IllegalArgumentException e) {}
try { sketch.getQuantile(0.5); fail(); } catch (IllegalArgumentException e) {}
try { sketch.getQuantiles(new double[] {0.0, 1.0}); fail(); } catch (IllegalArgumentException e) {}
final byte[] byteArr = sketch.toByteArray(new ArrayOfStringsSerDe());
assertEquals(byteArr.length, 8);
try { sketch.getPMF(new String[0]); fail(); } catch (IllegalArgumentException e) {}
try { sketch.getCDF(new String[0]); fail(); } catch (IllegalArgumentException e) {}
try { sketch.getRank("a"); fail(); } catch (IllegalArgumentException e) {}
}
@Test
public void oneItem() {
final ItemsSketch<String> sketch = ItemsSketch.getInstance(String.class, 128, Comparator.naturalOrder());
sketch.update("a");
assertEquals(sketch.getN(), 1);
assertEquals(sketch.getNumRetained(), 1);
assertEquals(sketch.getMinItem(), "a");
assertEquals(sketch.getMaxItem(), "a");
assertEquals(sketch.getQuantile(0.5, EXCLUSIVE), "a");
assertEquals(sketch.getRank("a", EXCLUSIVE), 0.0);
{
final double[] pmf = sketch.getPMF(new String[0], EXCLUSIVE);
assertEquals(pmf.length, 1);
assertEquals(pmf[0], 1.0);
}
{
final double[] pmf = sketch.getPMF(new String[] {"a"}, EXCLUSIVE);
assertEquals(pmf.length, 2);
assertEquals(pmf[0], 0.0);
assertEquals(pmf[1], 1.0);
}
{
final double[] cdf = sketch.getCDF(new String[0], EXCLUSIVE);
assertEquals(cdf.length, 1);
assertEquals(cdf[0], 1.0);
}
{
final double[] cdf = sketch.getCDF(new String[] {"a"}, EXCLUSIVE);
assertEquals(cdf.length, 2);
assertEquals(cdf[0], 0.0);
assertEquals(cdf[1], 1.0);
}
sketch.reset();
assertTrue(sketch.isEmpty());
assertEquals(sketch.getN(), 0);
assertEquals(sketch.getNumRetained(), 0);
try { sketch.getMinItem(); fail(); } catch (IllegalArgumentException e) {}
try { sketch.getMaxItem(); fail(); } catch (IllegalArgumentException e) {}
try { sketch.getQuantile(0.5); fail(); } catch (IllegalArgumentException e) {}
}
@Test
public void tenItems() {
final ItemsSketch<Integer> sketch = ItemsSketch.getInstance(Integer.class, 128, Comparator.naturalOrder());
for (int i = 1; i <= 10; i++) { sketch.update(i); }
assertFalse(sketch.isEmpty());
assertFalse(sketch.hasMemory());
assertFalse(sketch.isReadOnly());
assertEquals(sketch.getN(), 10);
assertEquals(sketch.getNumRetained(), 10);
for (int i = 1; i <= 10; i++) {
assertEquals(sketch.getRank(i), (i) / 10.0);
assertEquals(sketch.getRank(i, EXCLUSIVE), (i - 1) / 10.0);
assertEquals(sketch.getRank(i, INCLUSIVE), i / 10.0);
}
final Integer[] qArr = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
double[] rOut = sketch.getRanks(qArr); //inclusive
for (int i = 0; i < qArr.length; i++) {
assertEquals(rOut[i], (i + 1) / 10.0);
}
rOut = sketch.getRanks(qArr, EXCLUSIVE); //exclusive
for (int i = 0; i < qArr.length; i++) {
assertEquals(rOut[i], i / 10.0);
}
// inclusive = (default)
assertEquals(sketch.getQuantile(0, EXCLUSIVE), 1);
assertEquals(sketch.getQuantile(0.1, EXCLUSIVE), 2);
assertEquals(sketch.getQuantile(0.2, EXCLUSIVE), 3);
assertEquals(sketch.getQuantile(0.3, EXCLUSIVE), 4);
assertEquals(sketch.getQuantile(0.4, EXCLUSIVE), 5);
assertEquals(sketch.getQuantile(0.5, EXCLUSIVE), 6);
assertEquals(sketch.getQuantile(0.6, EXCLUSIVE), 7);
assertEquals(sketch.getQuantile(0.7, EXCLUSIVE), 8);
assertEquals(sketch.getQuantile(0.8, EXCLUSIVE), 9);
assertEquals(sketch.getQuantile(0.9, EXCLUSIVE), 10);
assertEquals(sketch.getQuantile(1, EXCLUSIVE), 10);
// inclusive = true
assertEquals(sketch.getQuantile(0, INCLUSIVE), 1);
assertEquals(sketch.getQuantile(0.1, INCLUSIVE), 1);
assertEquals(sketch.getQuantile(0.2, INCLUSIVE), 2);
assertEquals(sketch.getQuantile(0.3, INCLUSIVE), 3);
assertEquals(sketch.getQuantile(0.4, INCLUSIVE), 4);
assertEquals(sketch.getQuantile(0.5, INCLUSIVE), 5);
assertEquals(sketch.getQuantile(0.6, INCLUSIVE), 6);
assertEquals(sketch.getQuantile(0.7, INCLUSIVE), 7);
assertEquals(sketch.getQuantile(0.8, INCLUSIVE), 8);
assertEquals(sketch.getQuantile(0.9, INCLUSIVE), 9);
assertEquals(sketch.getQuantile(1, INCLUSIVE), 10);
// getQuantile() and getQuantiles() equivalence
{
// inclusive = (default)
final Integer[] quantiles =
sketch.getQuantiles(new double[] {0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1});
for (int i = 0; i <= 10; i++) {
assertEquals(sketch.getQuantile(i / 10.0), quantiles[i]);
}
}
{
// inclusive = true
final Integer[] quantiles =
sketch.getQuantiles(new double[] {0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1}, INCLUSIVE);
for (int i = 0; i <= 10; i++) {
assertEquals(sketch.getQuantile(i / 10.0, INCLUSIVE), quantiles[i]);
}
}
}
@Test
public void estimation() {
final ItemsSketch<Integer> sketch = ItemsSketch.getInstance(Integer.class, 128, Comparator.naturalOrder());
for (int i = 1; i <= 1000; i++) {
sketch.update(i);
}
assertEquals(sketch.getN(), 1000);
assertTrue(sketch.getNumRetained() < 1000);
assertEquals(sketch.getMinItem(), Integer.valueOf(1));
assertEquals(sketch.getMaxItem(), Integer.valueOf(1000));
// based on ~1.7% normalized rank error for this particular case
assertEquals(sketch.getQuantile(0.5), Integer.valueOf(500), 17);
final double[] normRanks = {0.0, 0.5, 1.0};
Integer[] quantiles = sketch.getQuantiles(normRanks);
assertEquals(quantiles[1], Integer.valueOf(500), 17); // median
final double[] normRanks2 = {.25, 0.5, 0.75};
final Integer[] quantiles2 = sketch.getQuantiles(normRanks2);
assertEquals(quantiles2[0], Integer.valueOf(250), 17);
assertEquals(quantiles2[1], Integer.valueOf(500), 17);
assertEquals(quantiles2[2], Integer.valueOf(750), 17);
final double normErr = sketch.getNormalizedRankError(true);
assertEquals(normErr, .0172, .001);
println(""+normErr);
{
final double[] pmf = sketch.getPMF(new Integer[0]);
assertEquals(pmf.length, 1);
assertEquals(pmf[0], 1.0);
}
{
final double[] pmf = sketch.getPMF(new Integer[] {500});
assertEquals(pmf.length, 2);
assertEquals(pmf[0], 0.5, 0.05);
assertEquals(pmf[1], 0.5, 0.05);
}
{
final Integer[] intArr = new Integer[50];
for (int i= 0; i<50; i++) {
intArr[i] = 20*i +10;
}
final double[] pmf = sketch.getPMF(intArr);
assertEquals(pmf.length, 51);
}
{
final double[] cdf = sketch.getCDF(new Integer[0]);
assertEquals(cdf.length, 1);
assertEquals(cdf[0], 1.0);
}
{
final double[] cdf = sketch.getCDF(new Integer[] {500});
assertEquals(cdf.length, 2);
assertEquals(cdf[0], 0.5, 0.05);
assertEquals(cdf[1], 1.0, 0.05);
}
assertEquals(sketch.getRank(500), 0.5, 0.01);
}
@Test
public void serializeDeserializeLong() {
final ItemsSketch<Long> sketch1 = ItemsSketch.getInstance(Long.class, 128, Comparator.naturalOrder());
for (int i = 1; i <= 500; i++) {
sketch1.update((long) i);
}
final ArrayOfItemsSerDe<Long> serDe = new ArrayOfLongsSerDe();
final byte[] bytes = sketch1.toByteArray(serDe);
final ItemsSketch<Long> sketch2 =
ItemsSketch.getInstance(Long.class, Memory.wrap(bytes), Comparator.naturalOrder(), serDe);
for (int i = 501; i <= 1000; i++) {
sketch2.update((long) i);
}
assertEquals(sketch2.getN(), 1000);
assertTrue(sketch2.getNumRetained() < 1000);
assertEquals(sketch2.getMinItem(), Long.valueOf(1));
assertEquals(sketch2.getMaxItem(), Long.valueOf(1000));
// based on ~1.7% normalized rank error for this particular case
assertEquals(sketch2.getQuantile(0.5), Long.valueOf(500), 17);
}
@Test
public void serializeDeserializeDouble() {
final ItemsSketch<Double> sketch1 = ItemsSketch.getInstance(Double.class, 128, Comparator.naturalOrder());
for (int i = 1; i <= 500; i++) {
sketch1.update((double) i);
}
final ArrayOfItemsSerDe<Double> serDe = new ArrayOfDoublesSerDe();
final byte[] bytes = sketch1.toByteArray(serDe);
final ItemsSketch<Double> sketch2 =
ItemsSketch.getInstance(Double.class, Memory.wrap(bytes), Comparator.naturalOrder(), serDe);
for (int i = 501; i <= 1000; i++) {
sketch2.update((double) i);
}
assertEquals(sketch2.getN(), 1000);
assertTrue(sketch2.getNumRetained() < 1000);
assertEquals(sketch2.getMinItem(), Double.valueOf(1));
assertEquals(sketch2.getMaxItem(), Double.valueOf(1000));
// based on ~1.7% normalized rank error for this particular case
assertEquals(sketch2.getQuantile(0.5), 500, 17);
}
@Test
public void serializeDeserializeString() {
// numeric order to be able to make meaningful assertions
final Comparator<String> numericOrder = new Comparator<String>() {
@Override
public int compare(final String s1, final String s2) {
final Integer i1 = Integer.parseInt(s1, 2);
final Integer i2 = Integer.parseInt(s2, 2);
return i1.compareTo(i2);
}
};
final ItemsSketch<String> sketch1 = ItemsSketch.getInstance(String.class, 128, numericOrder);
for (int i = 1; i <= 500; i++)
{
sketch1.update(Integer.toBinaryString(i << 10)); // to make strings longer
}
final ArrayOfItemsSerDe<String> serDe = new ArrayOfStringsSerDe();
final byte[] bytes = sketch1.toByteArray(serDe);
final ItemsSketch<String> sketch2 = ItemsSketch.getInstance(String.class, Memory.wrap(bytes), numericOrder, serDe);
for (int i = 501; i <= 1000; i++) {
sketch2.update(Integer.toBinaryString(i << 10));
}
assertEquals(sketch2.getN(), 1000);
assertTrue(sketch2.getNumRetained() < 1000);
assertEquals(sketch2.getMinItem(), Integer.toBinaryString(1 << 10));
assertEquals(sketch2.getMaxItem(), Integer.toBinaryString(1000 << 10));
// based on ~1.7% normalized rank error for this particular case
assertEquals(Integer.parseInt(sketch2.getQuantile(0.5), 2) >> 10, Integer.valueOf(500), 17);
}
@Test
public void toStringCrudeCheck() {
final ItemsSketch<String> sketch = ItemsSketch.getInstance(String.class, Comparator.naturalOrder());
String brief, full, part;
brief = sketch.toString();
full = sketch.toString(true, true);
part = sketch.toString(false, true);
sketch.update("a");
brief = sketch.toString();
full = sketch.toString(true, true);
part = sketch.toString(false, true);
//println(full);
assertTrue(brief.length() < full.length());
assertTrue(part.length() < full.length());
final ArrayOfItemsSerDe<String> serDe = new ArrayOfStringsSerDe();
final byte[] bytes = sketch.toByteArray(serDe);
ItemsSketch.toString(bytes);
ItemsSketch.toString(Memory.wrap(bytes));
//PreambleUtil.toString(bytes, true); // not a DoublesSketch so this will fail
//ItemsSketch<String> sketch2 = ItemsSketch.getInstance(Memory.wrap(bytes), Comparator.naturalOrder(), serDe);
}
@Test
public void toStringBiggerCheck() {
final ItemsSketch<String> sketch = ItemsSketch.getInstance(String.class, 16, Comparator.naturalOrder());
for (int i=0; i<40; i++) {
sketch.update(Integer.toString(i));
}
final String bigger = sketch.toString();
final String full = sketch.toString(true, true);
//println(full);
assertTrue(bigger.length() < full.length());
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkDownsampleException() {
final ItemsSketch<String> sketch = ItemsSketch.getInstance(String.class, 16, Comparator.naturalOrder());
for (int i=0; i<40; i++) {
sketch.update(Integer.toString(i));
}
sketch.downSample(32);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void negativeQuantileMustThrow() {
final ItemsSketch<String> sketch = ItemsSketch.getInstance(String.class, 16, Comparator.naturalOrder());
sketch.update("ABC");
sketch.getQuantile(-0.1);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkGetInstanceExcep1() {
final Memory mem = Memory.wrap(new byte[4]);
ItemsSketch.getInstance(String.class, mem, Comparator.naturalOrder(), new ArrayOfStringsSerDe());
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkGetInstanceExcep2() {
final Memory mem = Memory.wrap(new byte[8]);
ItemsSketch.getInstance(String.class, mem, Comparator.naturalOrder(), new ArrayOfStringsSerDe());
}
@Test
public void checkGoodSerDeId() {
final ItemsSketch<String> sketch = ItemsSketch.getInstance(String.class, Comparator.naturalOrder());
final byte[] byteArr = sketch.toByteArray(new ArrayOfStringsSerDe());
final Memory mem = Memory.wrap(byteArr);
//println(PreambleUtil.toString(mem));
ItemsSketch.getInstance(String.class, mem, Comparator.naturalOrder(), new ArrayOfStringsSerDe());
}
@Test
public void checkDownsample() {
final ItemsSketch<String> sketch = ItemsSketch.getInstance(String.class, 16, Comparator.naturalOrder());
for (int i=0; i<40; i++) {
sketch.update(Integer.toString(i));
}
final ItemsSketch<String> out = sketch.downSample(8);
assertEquals(out.getK(), 8);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void unorderedSplitPoints() {
final ItemsSketch<Integer> sketch = ItemsSketch.getInstance(Integer.class, Comparator.naturalOrder());
sketch.update(1);
sketch.getPMF(new Integer[] {2, 1});
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void nonUniqueSplitPoints() {
final ItemsSketch<Integer> sketch = ItemsSketch.getInstance(Integer.class, Comparator.naturalOrder());
sketch.update(1);
sketch.getPMF(new Integer[] {1, 1});
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void nullInSplitPoints() {
final ItemsSketch<Integer> sketch = ItemsSketch.getInstance(Integer.class, Comparator.naturalOrder());
sketch.update(1);
sketch.getPMF(new Integer[] {1, null});
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void compactNotSupported() {
final ArrayOfDoublesSerDe serDe = new ArrayOfDoublesSerDe();
final ItemsSketch<Double> sketch = ItemsSketch.getInstance(Double.class, Comparator.naturalOrder());
final byte[] byteArr = sketch.toByteArray(serDe);
final WritableMemory mem = WritableMemory.writableWrap(byteArr);
mem.clearBits(PreambleUtil.FLAGS_BYTE, (byte) PreambleUtil.COMPACT_FLAG_MASK);
println(PreambleUtil.toString(mem, false));
ItemsSketch.getInstance(Double.class, mem, Comparator.naturalOrder(), serDe);
}
@Test
public void checkPutMemory() {
final ItemsSketch<String> sketch = ItemsSketch.getInstance(String.class, 16, Comparator.naturalOrder());
for (int i=0; i<40; i++) {
sketch.update(Integer.toString(i));
}
final byte[] byteArr = new byte[200];
final WritableMemory mem = WritableMemory.writableWrap(byteArr);
sketch.putMemory(mem, new ArrayOfStringsSerDe());
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkPutMemoryException() {
final ItemsSketch<String> sketch = ItemsSketch.getInstance(String.class, 16, Comparator.naturalOrder());
for (int i=0; i<40; i++) {
sketch.update(Integer.toString(i));
}
final byte[] byteArr = new byte[100];
final WritableMemory mem = WritableMemory.writableWrap(byteArr);
sketch.putMemory(mem, new ArrayOfStringsSerDe());
}
@Test
public void checkPMFonEmpty() {
final ItemsSketch<String> iss = buildStringIS(32, 32);
final double[] ranks = new double[0];
final String[] qOut = iss.getQuantiles(ranks);
println("qOut: "+qOut.length);
assertEquals(qOut.length, 0);
final double[] cdfOut = iss.getCDF(new String[0]);
println("cdfOut: "+cdfOut.length);
assertEquals(cdfOut[0], 1.0, 0.0);
}
@Test
public void checkToFromByteArray() {
checkToFromByteArray2(128, 1300); //generates a pattern of 5 -> 101
checkToFromByteArray2(4, 7);
checkToFromByteArray2(4, 8);
checkToFromByteArray2(4, 9);
}
@Test
public void getRankAndGetCdfConsistency() {
final ItemsSketch<Integer> sketch = ItemsSketch.getInstance(Integer.class, Comparator.naturalOrder());
final int n = 1_000_000;
final Integer[] values = new Integer[n];
for (int i = 0; i < n; i++) {
sketch.update(i);
values[i] = i;
}
{ // inclusive = false (default)
final double[] ranks = sketch.getCDF(values);
for (int i = 0; i < n; i++) {
assertEquals(ranks[i], sketch.getRank(values[i]), 0.00001, "CDF vs rank for value " + i);
}
}
{ // inclusive = true
final double[] ranks = sketch.getCDF(values, INCLUSIVE);
for (int i = 0; i < n; i++) {
assertEquals(ranks[i], sketch.getRank(values[i], INCLUSIVE), 0.00001, "CDF vs rank for value " + i);
}
}
}
@Test
public void getRankAndGetCdfConsistencyReverseComparator() {
final ItemsSketch<Integer> sketch =
ItemsSketch.getInstance(Integer.class, Comparator.<Integer>naturalOrder().reversed());
final int n = 1_000_000;
final Integer[] values = new Integer[n];
for (int i = 0; i < n; i++) {
sketch.update(i);
values[i] = i;
}
Arrays.sort(values, sketch.getComparator());
final double[] ranks = sketch.getCDF(values);
for (int i = 0; i < n; i++) {
assertEquals(ranks[i], sketch.getRank(values[i]), 0.00001, "CDF vs rank for value " + i);
}
}
@Test
public void checkBounds() {
final ItemsSketch<Double> sketch = ItemsSketch.getInstance(Double.class, Comparator.naturalOrder());
for (int i = 0; i < 1000; i++) {
sketch.update((double)i);
}
final double eps = sketch.getNormalizedRankError(false);
final double est = sketch.getQuantile(0.5);
final double ub = sketch.getQuantileUpperBound(0.5);
final double lb = sketch.getQuantileLowerBound(0.5);
assertEquals(ub, (double)sketch.getQuantile(.5 + eps));
assertEquals(lb, (double)sketch.getQuantile(0.5 - eps));
println("Ext : " + est);
println("UB : " + ub);
println("LB : " + lb);
}
@Test
public void checkGetKFromEqs() {
final ItemsSketch<Double> sketch = ItemsSketch.getInstance(Double.class, Comparator.naturalOrder());
final int k = sketch.getK();
final double eps = ItemsSketch.getNormalizedRankError(k, false);
final double epsPmf = ItemsSketch.getNormalizedRankError(k, true);
final int kEps = ItemsSketch.getKFromEpsilon(eps, false);
final int kEpsPmf = ItemsSketch.getKFromEpsilon(epsPmf, true);
assertEquals(kEps, k);
assertEquals(kEpsPmf, k);
}
private static void checkToFromByteArray2(final int k, final int n) {
final ItemsSketch<String> is = buildStringIS(k, n);
byte[] byteArr;
Memory mem;
ItemsSketch<String> is2;
final ArrayOfStringsSerDe serDe = new ArrayOfStringsSerDe();
//ordered
byteArr = is.toByteArray(true, serDe);
mem = Memory.wrap(byteArr);
is2 = ItemsSketch.getInstance(String.class, mem, Comparator.naturalOrder(), serDe);
for (double f = 0.1; f < 0.95; f += 0.1) {
assertEquals(is.getQuantile(f), is2.getQuantile(f));
}
//Not-ordered
byteArr = is.toByteArray(false, serDe);
mem = Memory.wrap(byteArr);
is2 = ItemsSketch.getInstance(String.class, mem, Comparator.naturalOrder(), serDe);
for (double f = 0.1; f < 0.95; f += 0.1) {
assertEquals(is.getQuantile(f), is2.getQuantile(f));
}
}
static ItemsSketch<String> buildStringIS(final int k, final int n) {
return buildStringIS(k, n, 0);
}
static ItemsSketch<String> buildStringIS(final int k, final int n, final int start) {
final ItemsSketch<String> sketch = ItemsSketch.getInstance(String.class, k, Comparator.naturalOrder());
for (int i = 0; i < n; i++) {
sketch.update(Integer.toString(i + start));
}
return sketch;
}
@Test
public void sortedView() {
final ItemsSketch<Integer> sketch = ItemsSketch.getInstance(Integer.class, Comparator.naturalOrder());
sketch.update(3);
sketch.update(1);
sketch.update(2);
{ // cumulative inclusive
final GenericSortedView<Integer> view = sketch.getSortedView();
final GenericSortedViewIterator<Integer> it = view.iterator();
assertEquals(it.next(), true);
assertEquals(it.getQuantile(), 1);
assertEquals(it.getWeight(), 1);
assertEquals(it.getNaturalRank(INCLUSIVE), 1);
assertEquals(it.next(), true);
assertEquals(it.getQuantile(), 2);
assertEquals(it.getWeight(), 1);
assertEquals(it.getNaturalRank(INCLUSIVE), 2);
assertEquals(it.next(), true);
assertEquals(it.getQuantile(), 3);
assertEquals(it.getWeight(), 1);
assertEquals(it.getNaturalRank(INCLUSIVE), 3);
assertEquals(it.next(), false);
}
}
@Test
public void checkIssue484() {
Boolean[] items = { true,false,true,false,true,false,true,false,true,false };
ItemsSketch<Boolean> sketch = ItemsSketch.getInstance(Boolean.class, Boolean::compareTo);
for (int i = 0; i < items.length; i++) { sketch.update(items[i]); }
byte[] serialized = sketch.toByteArray(new ArrayOfBooleansSerDe());
ItemsSketch<Boolean> deserialized =
ItemsSketch.getInstance(Boolean.class, Memory.wrap(serialized), Boolean::compareTo, new ArrayOfBooleansSerDe());
checkSketchesEqual(sketch, deserialized);
}
private static <T> void checkSketchesEqual(ItemsSketch<T> expected, ItemsSketch<T> actual) {
ItemsSketchSortedView<T> expSV = expected.getSortedView();
ItemsSketchSortedView<T> actSV = actual.getSortedView();
int N = (int)actSV.getN();
long[] expCumWts = expSV.getCumulativeWeights();
Boolean[] expItemsArr = (Boolean[])expSV.getQuantiles();
long[] actCumWts = actSV.getCumulativeWeights();
Boolean[] actItemsArr = (Boolean[])actSV.getQuantiles();
printf("%3s %8s %8s\n", "i","Actual", "Expected");
for (int i = 0; i < N; i++) {
printf("%3d %8s %8s\n", i, actItemsArr[i].toString(), expItemsArr[i].toString());
}
assertEquals(actCumWts, expCumWts);
assertEquals(actItemsArr, expItemsArr);
}
@Test
//There is no guarantee that BaseBuffer is sorted after a merge.
//The issue is, during a merge, BB must be sorted prior to a compaction to a higher level.
//Otherwise the higher levels would not be sorted properly.
public void checkL0SortDuringMergeIssue527() throws NumberFormatException {
final Random rand = new Random();
final ItemsSketch<String> sk1 = ItemsSketch.getInstance(String.class, 8, Comparator.reverseOrder());
final ItemsSketch<String> sk2 = ItemsSketch.getInstance(String.class, 8, Comparator.reverseOrder());
final int n = 24; //don't change this
for (int i = 1; i <= n; i++ ) {
final int j = rand.nextInt(n) + 1;
sk1.update(getString(j, 3));
sk2.update(getString(j +100, 3));
}
int k = 8;
ItemsUnion<String> union = ItemsUnion.getInstance(String.class, k, Comparator.reverseOrder());
union.union(sk1);
union.union(sk2);
ItemsSketch<String> sk3 = union.getResult();
println(sk3.toString(true, true)); //L0 and above should be sorted in reverse. Ignore BB.
final QuantilesGenericSketchIterator<String> itr = sk3.iterator();
itr.next();
int prev = Integer.parseInt(itr.getQuantile().trim());
for (int i = 1; i < (2 * k); i++) {
if (itr.next()) {
int v = Integer.parseInt(itr.getQuantile().trim());
if (i == k) {
prev = Integer.parseInt(itr.getQuantile().trim());
continue;
}
assertTrue(v <= prev);
prev = v;
}
}
}
@Test
public void printlnTest() {
println("PRINTING: "+this.getClass().getName());
}
private final static boolean enablePrinting = false;
/**
* @param format the format
* @param args the args
*/
private static final void printf(final String format, final Object ...args) {
if (enablePrinting) { System.out.printf(format, args); }
}
/**
* @param o the Object to println
*/
private static final void println(final Object o) {
if (enablePrinting) { System.out.println(o.toString()); }
}
}