blob: 030c930b5adfd99bdef705944901fcc965ea71b8 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.quantilescommon;
import static org.apache.datasketches.common.Util.LS;
import static org.apache.datasketches.quantilescommon.LongsAsOrderableStrings.digits;
import static org.apache.datasketches.quantilescommon.LongsAsOrderableStrings.getString;
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE;
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.fail;
import java.util.Comparator;
import org.apache.datasketches.common.ArrayOfStringsSerDe;
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.kll.KllItemsSketch;
import org.apache.datasketches.quantiles.ItemsSketch;
import org.testng.annotations.Test;
/**
* This tests partition boundaries with both KllItemsSketch and classic ItemsSketch
*/
public class PartitionBoundariesTest {
private ArrayOfStringsSerDe serDe = new ArrayOfStringsSerDe();
private static String[] hdr = {"N", "MaxItem", "MinItem", "NumParts", "SearchCriteria"};
private static String hdrfmt = "%6s %10s %10s %10s %15s" + LS;
private static String hdrdfmt = "%6d %10s %10s %10d %15s" + LS;
private static String[] rowhdr = {"Row", "NormRanks", "NatRanks", "Boundaries", "DeltaItems"};
private static String rowhdrfmt = "%5s %12s %12s %12s %12s" + LS;
private static String rowdfmt = "%5d %12.8f %12d %12s %12d" + LS;
private static String[] rowhdr2 = {"Row", "NormRanks", "NatRanks", "Boundaries"};
private static String rowhdrfmt2= "%5s %12s %12s %12s" + LS;
private static String rowdfmt2 = "%5d %12.8f %12d %12s" + LS;
//@Test //visual check only. set enablePrinting = true to view.
public void checkSkewWithClassic() {
int n = 2050; //1000000;
int k = 1 << 15;
int n2 = 200;
int totalN = n + n2;
int numDigits = digits(totalN);
long v2 = 1000L;
QuantileSearchCriteria searchCrit = QuantileSearchCriteria.INCLUSIVE;
ItemsSketch<String> sk = ItemsSketch.getInstance(String.class,k, Comparator.naturalOrder());
for (long i = 1; i <= n; i++) { sk.update(getString(i, numDigits)); }
for (long i = 1; i <= n2; i++) { sk.update(getString(v2, numDigits)); }
int numParts = sk.getMaxPartitions(); //22
ItemsSketchSortedView<String> sv = sk.getSortedView();
GenericSortedViewIterator<String> itr = sv.iterator();
println("SORTED VIEW:");
printf(rowhdrfmt2, (Object[])rowhdr2);
int j = 0;
while (itr.next()) {
printf(rowdfmt2, j++, itr.getNormalizedRank(searchCrit), itr.getNaturalRank(searchCrit), itr.getQuantile());
}
GenericPartitionBoundaries<String> gpb = sv.getPartitionBoundariesFromNumParts(numParts, searchCrit);
int arrLen = gpb.getBoundaries().length;
double[] normRanks = gpb.getNormalizedRanks();
long[] natRanks = gpb.getNaturalRanks();
String[] boundaries = gpb.getBoundaries();
long[] numDeltaItems = gpb.getNumDeltaItems();
println("");
println("GET PARTITION BOUNDARIES:");
printf(hdrfmt, (Object[]) hdr);
printf(hdrdfmt, totalN, gpb.getMaxItem(), gpb.getMinItem(), numParts, searchCrit.toString());
println("");
printf(rowhdrfmt, (Object[]) rowhdr);
for (int i = 0; i < arrLen; i++) {
printf(rowdfmt, i, normRanks[i], natRanks[i], boundaries[i], numDeltaItems[i]);
}
}
//@Test //visual check only. set enablePrinting = true to view.
public void checkSkewWithKll() {
int n = 2050; //1_000_000;
int k = 1 << 15;
int n2 = 200;
int totalN = n + n2;
int numDigits = digits(totalN);
long v2 = 1000L;
QuantileSearchCriteria searchCrit = QuantileSearchCriteria.INCLUSIVE;
KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
for (long i = 1; i <= n; i++) { sk.update(getString(i, numDigits)); }
for (long i = 1; i <= n2; i++) { sk.update(getString(v2, numDigits)); }
int numParts = sk.getMaxPartitions(); //22
ItemsSketchSortedView<String> sv = sk.getSortedView();
GenericSortedViewIterator<String> itr = sv.iterator();
println("SORTED VIEW:");
printf(rowhdrfmt2, (Object[])rowhdr2);
int j = 0;
while (itr.next()) {
printf(rowdfmt2, j++, itr.getNormalizedRank(searchCrit), itr.getNaturalRank(searchCrit), itr.getQuantile());
}
GenericPartitionBoundaries<String> gpb = sv.getPartitionBoundariesFromNumParts(numParts, searchCrit);
int arrLen = gpb.getBoundaries().length;
double[] normRanks = gpb.getNormalizedRanks();
long[] natRanks = gpb.getNaturalRanks();
String[] boundaries = gpb.getBoundaries();
long[] numDeltaItems = gpb.getNumDeltaItems();
println("");
println("GET PARTITION BOUNDARIES:");
printf(hdrfmt, (Object[]) hdr);
printf(hdrdfmt, totalN, gpb.getMaxItem(), gpb.getMinItem(), numParts, searchCrit.toString());
println("");
printf(rowhdrfmt, (Object[]) rowhdr);
for (int i = 0; i < arrLen; i++) {
printf(rowdfmt, i, normRanks[i], natRanks[i], boundaries[i], numDeltaItems[i]);
}
}
@Test
public void getQuantilesVsPartitionBoundariesKll() {
final KllItemsSketch<String> sketch = KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
sketch.update("A");
sketch.update("B");
sketch.update("C");
sketch.update("D");
String[] quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, EXCLUSIVE);
String[] quantiles2 = sketch.getPartitionBoundariesFromNumParts(2, EXCLUSIVE).getBoundaries();
assertEquals(quantiles1, quantiles2);
quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, INCLUSIVE);
quantiles2 = sketch.getPartitionBoundariesFromNumParts(2, INCLUSIVE).getBoundaries();
assertEquals(quantiles1, quantiles2);
}
@Test
public void getQuantilesVsPartitionBoundariesClassic() {
final ItemsSketch<Integer> sketch = ItemsSketch.getInstance(Integer.class, Comparator.naturalOrder());
sketch.update(1);
sketch.update(2);
sketch.update(3);
sketch.update(4);
Integer[] quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, EXCLUSIVE);
Integer[] quantiles2 = sketch.getPartitionBoundariesFromNumParts(2, EXCLUSIVE).getBoundaries();
assertEquals(quantiles1, quantiles2);
quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, INCLUSIVE);
quantiles2 = sketch.getPartitionBoundariesFromNumParts(2, INCLUSIVE).getBoundaries();
assertEquals(quantiles1, quantiles2);
}
/**
* Because both Kll and Classic items sketches use the same Sorted View class.
* This test applies to both.
*/
@Test
public void checkSimpleEndsAdjustment() {
final String[] quantiles = {"2","4","6","7"};
final long[] cumWeights = {2, 4, 6, 8};
final long totalN = 8;
final Comparator<String> comparator = Comparator.naturalOrder();
final String maxItem = "8";
final String minItem = "1";
ItemsSketchSortedView<String> sv = new ItemsSketchSortedView<>(
quantiles, cumWeights, totalN, comparator, maxItem, minItem,
String.class, .01, 4);
GenericSortedViewIterator<String> itr = sv.iterator();
while (itr.next()) {
println(itr.getNaturalRank(INCLUSIVE) + ", " + itr.getQuantile(INCLUSIVE));
}
GenericPartitionBoundaries<String> gpb = sv.getPartitionBoundariesFromNumParts(2);
String[] boundaries = gpb.getBoundaries();
long[] natRanks = gpb.getNaturalRanks();
double[] normRanks = gpb.getNormalizedRanks();
long[] deltaItems = gpb.getNumDeltaItems();
int numParts = gpb.getNumPartitions();
String maxItm = gpb.getMaxItem();
String minItm = gpb.getMinItem();
assertEquals(boundaries, new String[] {"1","4","8"});
assertEquals(natRanks, new long[] {1,4,8});
assertEquals(normRanks, new double[] {.125,.5,1.0});
assertEquals(deltaItems, new long[] {0,4,4});
assertEquals(numParts, 2);
assertEquals(maxItm, "8");
assertEquals(minItm, "1");
}
@SuppressWarnings("unused")
@Test //For visual check, set enablePrinting = true to view.
public void checkSketchPartitionLimits() {
final long totalN = 2000; //1_000_000;
final Comparator<String> comparator = Comparator.naturalOrder();
final ArrayOfStringsSerDe serDe = new ArrayOfStringsSerDe();
final int k = 1 << 15;
final KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(k, comparator, serDe);
final int d = digits(totalN);
for (int i = 1; i <= totalN; i++) {
sk.update(getString(i, d));
}
//***
final int numRet = sk.getNumRetained();
println("NumRetained: " + numRet + " /2: " + (numRet / 2));
final double eps = sk.getNormalizedRankError(true);
printf("NormRankErr: %10.6f 1/eps: %10.3f" + LS, eps, 1/eps);
//***
//this should pass
final int goodNumPartsRequest = sk.getMaxPartitions();
println("Good numPartsRequest " + goodNumPartsRequest);
GenericPartitionBoundaries<String> gpb = sk.getPartitionBoundariesFromNumParts(goodNumPartsRequest);
//this should fail
try {
final int badNumPartsRequest = goodNumPartsRequest + 1;
println("Bad numPartsRequest " + badNumPartsRequest);
gpb = sk.getPartitionBoundariesFromNumParts(badNumPartsRequest);
fail("Bad numPartsRequest should have failed. " + badNumPartsRequest);
} catch (SketchesArgumentException e) { } //OK
}
@SuppressWarnings("unused")
@Test //For visual check, set enablePrinting = true to view.
public void checkSketchPartitionLimits2() {
final long totalN = 2000; //1_000_000;
final Comparator<String> comparator = Comparator.naturalOrder();
final ArrayOfStringsSerDe serDe = new ArrayOfStringsSerDe();
final int k = 1 << 15;
final KllItemsSketch<String> sk = KllItemsSketch.newHeapInstance(k, comparator, serDe);
final int d = digits(totalN);
for (int i = 1; i <= totalN; i++) {
sk.update(getString(i, d));
}
final double eps = sk.getNormalizedRankError(true);
printf("NormRankErr: %10.6f 1/eps: %10.3f" + LS, eps, 1/eps);
println("N: " + sk.getN());
println("Max Parts: " + sk.getMaxPartitions());
//this should pass
final long goodPartSizeRequest= sk.getMinPartitionSizeItems();
println("Good partSizeRequest " + goodPartSizeRequest);
GenericPartitionBoundaries<String> gpb = sk.getPartitionBoundariesFromPartSize(goodPartSizeRequest);
//this should fail
try {
final long badPartSizeRequest = goodPartSizeRequest - 1;
println("Bad partSizeRequest " + badPartSizeRequest);
} catch (SketchesArgumentException e) { } //OK
}
@Test
public void printlnTest() {
println("PRINTING: " + this.getClass().getName());
}
private final static boolean enablePrinting = false;
/**
* @param format the format
* @param args the args
*/
static final void printf(final String format, final Object ...args) {
if (enablePrinting) { System.out.printf(format, args); }
}
/**
* @param o the Object to println
*/
static final void println(final Object o) {
if (enablePrinting) { System.out.println(o.toString()); }
}
}