blob: 35d73fce31f756c0e97c18a63506fbedb6443b5c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.kll;
import static org.apache.datasketches.kll.KllSketch.SketchType.ITEMS_SKETCH;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
import java.util.Comparator;
import org.apache.datasketches.common.ArrayOfStringsSerDe;
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.common.Util;
import org.apache.datasketches.kll.KllItemsSketchSortedView.KllItemsSketchSortedViewIterator;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
import org.testng.annotations.Test;
public class KllMiscItemsTest {
static final String LS = System.getProperty("line.separator");
public ArrayOfStringsSerDe serDe = new ArrayOfStringsSerDe();
@Test
public void checkSortedViewConstruction() {
final KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
final int n = 20;
final int digits = Util.numDigits(n);
for (int i = 1; i <= n; i++) {
sk.update(Util.intToFixedLengthString(i, digits));
}
KllItemsSketchSortedView<String> sv = sk.getSortedView();
long[] cumWeights = sv.getCumulativeWeights();
String[] values = sv.getQuantiles();
assertEquals(cumWeights.length, 20);
assertEquals(values.length, 20);
for (int i = 0; i < 20; i++) {
assertEquals(cumWeights[i], i + 1);
assertEquals(values[i], Util.intToFixedLengthString(i + 1, digits));
}
}
@Test //set static enablePrinting = true for visual checking
public void checkBounds() {
final KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
final int n = 1000;
final int digits = Util.numDigits(n);
for (int i = 1; i <= n; i++) {
sk.update(Util.intToFixedLengthString(i, digits));
}
final double eps = sk.getNormalizedRankError(false);
final String est = sk.getQuantile(0.5);
final String ub = sk.getQuantileUpperBound(0.5);
final String lb = sk.getQuantileLowerBound(0.5);
assertEquals(ub, sk.getQuantile(.5 + eps));
assertEquals(lb, sk.getQuantile(0.5 - eps));
println("Ext : " + est);
println("UB : " + ub);
println("LB : " + lb);
final double rest = sk.getRank(est);
final double restUB = sk.getRankUpperBound(rest);
final double restLB = sk.getRankLowerBound(rest);
assertTrue(restUB - rest < (2 * eps));
assertTrue(rest - restLB < (2 * eps));
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkHeapifyExceptions1() {
KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray());
wmem.putByte(6, (byte)3); //corrupt with odd M
KllItemsSketch.heapify(wmem, Comparator.naturalOrder(), serDe);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkHeapifyExceptions2() {
KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray());
wmem.putByte(0, (byte)1); //corrupt preamble ints, should be 2
KllItemsSketch.heapify(wmem, Comparator.naturalOrder(), serDe);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkHeapifyExceptions3() {
KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
sk.update("1");
sk.update("2");
WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray());
wmem.putByte(0, (byte)1); //corrupt preamble ints, should be 5
KllItemsSketch.heapify(wmem, Comparator.naturalOrder(), serDe);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkHeapifyExceptions4() {
KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray());
wmem.putByte(1, (byte)0); //corrupt SerVer, should be 1 or 2
KllItemsSketch.heapify(wmem, Comparator.naturalOrder(), serDe);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkHeapifyExceptions5() {
KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray());
wmem.putByte(2, (byte)0); //corrupt FamilyID, should be 15
KllItemsSketch.heapify(wmem, Comparator.naturalOrder(), serDe);
}
@Test //set static enablePrinting = true for visual checking
public void checkMisc() {
KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} //empty
println(sk.toString(true, true));
final int n = 21;
final int digits = Util.numDigits(n);
for (int i = 1; i <= n; i++) {
sk.update(Util.intToFixedLengthString(i, digits));
}
println(sk.toString(true, true));
sk.toByteArray();
final String[] items = sk.getTotalItemsArray();
assertEquals(items.length, 33);
final int[] levels = sk.getLevelsArray(sk.sketchStructure);
assertEquals(levels.length, 3);
assertEquals(sk.getNumLevels(), 2);
}
//@Test //set static enablePrinting = true for visual checking
public void visualCheckToString() {
final KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
int n = 21;
int digits = 3;
for (int i = 1; i <= n; i++) { sk.update(Util.intToFixedLengthString(i, digits)); }
println(sk.toString(true, true));
assertEquals(sk.getNumLevels(), 2);
assertEquals(sk.getMinItem()," 1");
assertEquals(sk.getMaxItem()," 21");
assertEquals(sk.getNumRetained(), 11);
final KllItemsSketch<String> sk2 = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
n = 400;
digits = 3;
for (int i = 101; i <= n + 100; i++) { sk2.update(Util.intToFixedLengthString(i, digits)); }
println(LS + sk2.toString(true, true));
assertEquals(sk2.getNumLevels(), 5);
assertEquals(sk2.getMinItem(), "101");
assertEquals(sk2.getMaxItem(), "500");
assertEquals(sk2.getNumRetained(), 52);
sk2.merge(sk);
println(LS + sk2.toString(true, true));
assertEquals(sk2.getNumLevels(), 5);
assertEquals(sk2.getMinItem()," 1");
assertEquals(sk2.getMaxItem(), "500");
assertEquals(sk2.getNumRetained(), 56);
}
@Test //set static enablePrinting = true for visual checking
public void viewHeapCompactions() {
int k = 20;
int n = 108;
int digits = Util.numDigits(n);
int compaction = 0;
KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
for (int i = 1; i <= n; i++) {
sk.update(Util.intToFixedLengthString(i, digits));
if (sk.levelsArr[0] == 0) {
println(LS + "#<<< BEFORE COMPACTION # " + (++compaction) + " >>>");
println(sk.toString(true, true));
sk.update(Util.intToFixedLengthString(++i, digits));
println(LS + "#<<< AFTER COMPACTION # " + (compaction) + " >>>");
println(sk.toString(true, true));
assertEquals(sk.getTotalItemsArray()[sk.levelsArr[0]], Util.intToFixedLengthString(i, digits));
}
}
}
@Test //set static enablePrinting = true for visual checking
public void viewCompactionAndSortedView() {
final int n = 43;
final int digits = Util.numDigits(n);
KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
for (int i = 1; i <= n; i++) { sk.update(Util.intToFixedLengthString(i, digits)); }
println(sk.toString(true, true));
KllItemsSketchSortedView<String> sv = sk.getSortedView();
KllItemsSketchSortedViewIterator<String> itr = sv.iterator();
println("### SORTED VIEW");
printf("%12s%12s\n", "Value", "CumWeight");
while (itr.next()) {
String v = itr.getQuantile();
long wt = itr.getWeight();
printf("%12s%12d\n", v, wt);
}
}
@Test
public void checkGrowLevels() {
KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
final int n = 21;
final int digits = Util.numDigits(n);
for (int i = 1; i <= n; i++) { sk.update(Util.intToFixedLengthString(i, digits)); }
assertEquals(sk.getNumLevels(), 2);
assertEquals(sk.getTotalItemsArray().length, 33);
assertEquals(sk.getLevelsArray(sk.sketchStructure)[2], 33);
}
@Test //set static enablePrinting = true for visual checking
public void checkSketchInitializeItemsHeap() {
int k = 20; //don't change this
int n = 21;
final int digits = Util.numDigits(n);
KllItemsSketch<String> sk;
println("#### CASE: FLOAT FULL HEAP");
sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
for (int i = 1; i <= n; i++) { sk.update(Util.intToFixedLengthString(i, digits)); }
println(sk.toString(true, true));
assertEquals(sk.getK(), k);
assertEquals(sk.getN(), n);
assertEquals(sk.getNumRetained(), 11);
assertFalse(sk.isEmpty());
assertTrue(sk.isEstimationMode());
assertEquals(sk.getMinK(), k);
assertEquals(sk.getTotalItemsArray().length, 33);
assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3);
assertEquals(sk.getMaxItem(), "21");
assertEquals(sk.getMinItem(), " 1");
assertEquals(sk.getNumLevels(), 2);
assertFalse(sk.isLevelZeroSorted());
println("#### CASE: FLOAT HEAP EMPTY");
sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
println(sk.toString(true, true));
assertEquals(sk.getK(), k);
assertEquals(sk.getN(), 0);
assertEquals(sk.getNumRetained(), 0);
assertTrue(sk.isEmpty());
assertFalse(sk.isEstimationMode());
assertEquals(sk.getMinK(), k);
assertEquals(sk.getTotalItemsArray().length, 20);
assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2);
try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) { }
try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) { }
assertEquals(sk.getNumLevels(), 1);
assertFalse(sk.isLevelZeroSorted());
println("#### CASE: FLOAT HEAP SINGLE");
sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
sk.update("1");
println(sk.toString(true, true));
assertEquals(sk.getK(), k);
assertEquals(sk.getN(), 1);
assertEquals(sk.getNumRetained(), 1);
assertFalse(sk.isEmpty());
assertFalse(sk.isEstimationMode());
assertEquals(sk.getMinK(), k);
assertEquals(sk.getTotalItemsArray().length, 20);
assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2);
assertEquals(sk.getMaxItem(), "1");
assertEquals(sk.getMinItem(), "1");
assertEquals(sk.getNumLevels(), 1);
assertFalse(sk.isLevelZeroSorted());
}
@Test //set static enablePrinting = true for visual checking
public void checkSketchInitializeItemsHeapifyCompactMem() {
int k = 20; //don't change this
int n = 21;
final int digits = Util.numDigits(n);
KllItemsSketch<String> sk;
KllItemsSketch<String> sk2;
byte[] compBytes;
Memory mem;
println("#### CASE: FLOAT FULL HEAPIFIED FROM COMPACT");
sk2 = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
for (int i = 1; i <= n; i++) { sk2.update(Util.intToFixedLengthString(i, digits)); }
println(sk2.toString(true, true));
compBytes = sk2.toByteArray();
mem = Memory.wrap(compBytes);
println(KllPreambleUtil.toString(mem, ITEMS_SKETCH, true, serDe));
sk = KllItemsSketch.heapify(mem, Comparator.naturalOrder(), serDe);
assertEquals(sk.getK(), k);
assertEquals(sk.getN(), k + 1);
assertEquals(sk.getNumRetained(), 11);
assertFalse(sk.isEmpty());
assertTrue(sk.isEstimationMode());
assertEquals(sk.getMinK(), k);
assertEquals(sk.getTotalItemsArray().length, 33);
assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3);
assertEquals(sk.getMaxItem(), "21");
assertEquals(sk.getMinItem(), " 1");
assertEquals(sk.getNumLevels(), 2);
assertFalse(sk.isLevelZeroSorted());
println("#### CASE: FLOAT EMPTY HEAPIFIED FROM COMPACT");
sk2 = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
//println(sk.toString(true, true));
compBytes = sk2.toByteArray();
mem = Memory.wrap(compBytes);
println(KllPreambleUtil.toString(mem, ITEMS_SKETCH, true, serDe));
sk = KllItemsSketch.heapify(mem, Comparator.naturalOrder(), serDe);
assertEquals(sk.getK(), k);
assertEquals(sk.getN(), 0);
assertEquals(sk.getNumRetained(), 0);
assertTrue(sk.isEmpty());
assertFalse(sk.isEstimationMode());
assertEquals(sk.getMinK(), k);
assertEquals(sk.getTotalItemsArray().length, 20);
assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2);
try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) { }
try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) { }
assertEquals(sk.getNumLevels(), 1);
assertFalse(sk.isLevelZeroSorted());
println("#### CASE: FLOAT SINGLE HEAPIFIED FROM COMPACT");
sk2 = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
sk2.update("1");
//println(sk2.toString(true, true));
compBytes = sk2.toByteArray();
mem = Memory.wrap(compBytes);
println(KllPreambleUtil.toString(mem, ITEMS_SKETCH, true, serDe));
sk = KllItemsSketch.heapify(mem, Comparator.naturalOrder(), serDe);
assertEquals(sk.getK(), k);
assertEquals(sk.getN(), 1);
assertEquals(sk.getNumRetained(), 1);
assertFalse(sk.isEmpty());
assertFalse(sk.isEstimationMode());
assertEquals(sk.getMinK(), k);
assertEquals(sk.getTotalItemsArray().length, 20);
assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2);
assertEquals(sk.getMaxItem(), "1");
assertEquals(sk.getMinItem(), "1");
assertEquals(sk.getNumLevels(), 1);
assertFalse(sk.isLevelZeroSorted());
}
//public void checkSketchInitializeFloatHeapifyUpdatableMem() Not Supported
@Test //set static enablePrinting = true for visual checking
public void checkMemoryToStringItemsCompact() {
int k = 20; //don't change this
int n = 21;
final int digits = Util.numDigits(n);
KllItemsSketch<String> sk;
KllItemsSketch<String> sk2;
byte[] compBytes;
byte[] compBytes2;
Memory mem;
String s;
println("#### CASE: FLOAT FULL COMPACT");
sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
for (int i = 1; i <= n; i++) { sk.update(Util.intToFixedLengthString(i, digits)); }
compBytes = sk.toByteArray();
mem = Memory.wrap(compBytes);
s = KllPreambleUtil.toString(mem, ITEMS_SKETCH, true, serDe);
println("step 1: sketch to byte[]/memory & analyze memory");
println(s);
sk2 = KllItemsSketch.heapify(mem, Comparator.naturalOrder(), serDe);
compBytes2 = sk2.toByteArray();
mem = Memory.wrap(compBytes2);
s = KllPreambleUtil.toString(mem, ITEMS_SKETCH, true, serDe);
println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above");
println(s);
assertEquals(compBytes, compBytes2);
println("#### CASE: FLOAT EMPTY COMPACT");
sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
compBytes = sk.toByteArray();
mem = Memory.wrap(compBytes);
s = KllPreambleUtil.toString(mem, ITEMS_SKETCH, true, serDe);
println("step 1: sketch to byte[]/memory & analyze memory");
println(s);
sk2 = KllItemsSketch.heapify(mem, Comparator.naturalOrder(), serDe);
compBytes2 = sk2.toByteArray();
mem = Memory.wrap(compBytes2);
s = KllPreambleUtil.toString(mem, ITEMS_SKETCH, true, serDe);
println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above");
println(s);
assertEquals(compBytes, compBytes2);
println("#### CASE: FLOAT SINGLE COMPACT");
sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
sk.update("1");
compBytes = sk.toByteArray();
mem = Memory.wrap(compBytes);
s = KllPreambleUtil.toString(mem, ITEMS_SKETCH, true, serDe);
println("step 1: sketch to byte[]/memory & analyze memory");
println(s);
sk2 = KllItemsSketch.heapify(mem, Comparator.naturalOrder(), serDe);
compBytes2 = sk2.toByteArray();
mem = Memory.wrap(compBytes2);
s = KllPreambleUtil.toString(mem, ITEMS_SKETCH, true, serDe);
println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above");
println(s);
assertEquals(compBytes, compBytes2);
}
// public void checkMemoryToStringFloatUpdatable() Not Supported
// public void checkSimpleMerge() not supported
@Test
public void checkGetSingleItem() {
int k = 20;
KllItemsSketch<String> skHeap = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
skHeap.update("1");
assertTrue(skHeap instanceof KllHeapItemsSketch);
assertEquals(skHeap.getSingleItem(), "1");
Memory srcMem = Memory.wrap(KllHelper.toByteArray(skHeap, true)); //true is ignored
KllItemsSketch<String> skDirect = KllItemsSketch.wrap(srcMem, Comparator.naturalOrder(), serDe);
assertTrue(skDirect instanceof KllDirectCompactItemsSketch);
assertEquals(skDirect.getSingleItem(), "1");
Memory srcMem2 = Memory.wrap(skHeap.toByteArray());
KllItemsSketch<String> skCompact = KllItemsSketch.wrap(srcMem2, Comparator.naturalOrder(), serDe);
assertTrue(skCompact instanceof KllDirectCompactItemsSketch);
assertEquals(skCompact.getSingleItem(), "1");
}
private final static boolean enablePrinting = false;
/**
* @param format the format
* @param args the args
*/
private static final void printf(final String format, final Object ...args) {
if (enablePrinting) { System.out.printf(format, args); }
}
/**
* @param o the Object to println
*/
private static final void println(final Object o) {
if (enablePrinting) { System.out.println(o.toString()); }
}
}