blob: b8015e1cc763324c6b1484b431c75af2b4a99a72 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.hll;
import static org.apache.datasketches.hll.TgtHllType.HLL_4;
import static org.apache.datasketches.hll.TgtHllType.HLL_6;
import static org.apache.datasketches.hll.TgtHllType.HLL_8;
import static java.lang.Math.min;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
import org.testng.annotations.Test;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.SketchesArgumentException;
/**
* @author Lee Rhodes
*/
@SuppressWarnings("javadoc")
public class DirectUnionTest {
static final String LS = System.getProperty("line.separator");
static final int[] nArr = new int[] {1, 3, 10, 30, 100, 300, 1000, 3000, 10000, 30000};
// n1,... lgK,... tgtHll, Mode Ooo Est
static final String hdrFmt =
"%6s%6s%6s" + "%8s%5s%5s%5s" + "%7s%6s" + "%7s%6s%6s" +"%3s%2s%2s"+ "%13s%12s";
static final String hdr = String.format(hdrFmt,
"n1", "n2", "tot",
"lgMaxK", "lgK1", "lgK2", "lgKR",
"tgt1", "tgt2",
"Mode1", "Mode2", "ModeR",
"1", "2", "R",
"Est", "Err%");
/**
* The task here is to check the transition boundaries as the sketch morphs between LIST to
* SET to HLL modes. The transition points vary as a function of lgConfigK. In addition,
* this checks that the union operation is operating properly based on the order the
* sketches are presented to the union.
*/
@Test
public void checkUnions() {
//HLL_4: t=0, HLL_6: t=1, HLL_8: t=2
int t1 = 2; //type = HLL_8
int t2 = 2;
int rt = 2; //result type
println("TgtR: " + TgtHllType.values()[rt].toString());
int lgK1 = 7;
int lgK2 = 7;
int lgMaxK = 7;
int n1 = 7;
int n2 = 7;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
n1 = 8;
n2 = 7;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
n1 = 7;
n2 = 8;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
n1 = 8;
n2 = 8;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
n1 = 7;
n2 = 14;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
println("++END BASE GROUP++");
int i = 0;
for (i = 7; i <= 13; i++)
{
lgK1 = i;
lgK2 = i;
lgMaxK = i;
{
n1 = ((1 << (i - 3)) * 3)/4; //compute the transition point
n2 = n1;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
n1 += 2;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
n1 -= 2;
n2 += 2;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
n1 += 2;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
}
println("--END MINOR GROUP--");
lgK1 = i;
lgK2 = i + 1;
lgMaxK = i;
{
n1 = ((1 << (i - 3)) * 3)/4;
n2 = n1;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
n1 += 2;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
n1 -= 2;
n2 += 2;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
n1 += 2;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
}
println("--END MINOR GROUP--");
lgK1 = i + 1;
lgK2 = i;
lgMaxK = i;
{
n1 = ((1 << (i - 3)) * 3)/4;
n2 = n1;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
n1 += 2;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
n1 -= 2;
n2 += 2;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
n1 += 2;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
}
println("--END MINOR GROUP--");
lgK1 = i + 1;
lgK2 = i + 1;
lgMaxK = i;
{
n1 = ((1 << (i - 3)) * 3)/4;
n2 = n1;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
n1 += 2;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
n1 -= 2;
n2 += 2;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
n1 += 2;
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, t1, t2, rt);
}
println("++END MAJOR GROUP++");
} //End for
}
@Test
public void check() { //n1=8, n2=7, lgK1=lgK2=lgMaxK=7, all HLL_8
basicUnion(8, 7, 7, 7, 7, 2, 2, 2);
}
private static void basicUnion(int n1, int n2, int lgK1, int lgK2,
int lgMaxK, int t1, int t2, int rt) {
long v = 0;
int tot = n1 + n2;
TgtHllType type1 = TgtHllType.values()[t1];
String t1str = type1.toString();
TgtHllType type2 = TgtHllType.values()[t2];
String t2str = type2.toString();
TgtHllType resultType = TgtHllType.values()[rt];
//String rtStr = resultType.toString();
HllSketch h1 = new HllSketch(lgK1, type1);
HllSketch h2 = new HllSketch(lgK2, type2);
int lgControlK = min(min(lgK1, lgK2), lgMaxK); //min of all 3
HllSketch control = new HllSketch(lgControlK, resultType);
String dataFmt = "%6d%6d%6d," + "%7d%5d%5d%5d," + "%6s%6s," + "%6s%6s%6s,"
+"%2s%2s%2s," + "%12f%12f%%";
for (long i = 0; i < n1; i++) {
h1.update(v + i);
control.update(v + i);
}
v += n1;
for (long i = 0; i < n2; i++) {
h2.update(v + i);
control.update(v + i);
}
v += n2;
String h1SketchStr = ("H1 SKETCH: \n" + h1.toString());
String h2SketchStr = ("H2 SKETCH: \n" + h2.toString());
Union union = newUnion(lgMaxK);
union.update(h1);
String uH1SketchStr = ("Union after H1: \n" + union.getResult(resultType).toString());
//println(uH1SketchStr);
union.update(h2);
HllSketch result = union.getResult(resultType);
int lgKR = result.getLgConfigK();
String uSketchStr =("Union after H2: \n" + result.toString());
double uEst = result.getEstimate();
double uUb = result.getUpperBound(2);
double uLb = result.getLowerBound(2);
double rerr = ((uEst/tot) - 1.0) * 100;
String mode1 = h1.getCurMode().toString();
String mode2 = h2.getCurMode().toString();
String modeR = result.getCurMode().toString();
//Control
String cSketchStr = ("CONTROL SKETCH: \n" + control.toString());
double controlEst = control.getEstimate();
double controlUb = control.getUpperBound(2);
double controlLb = control.getLowerBound(2);
String h1ooo = h1.isOutOfOrder() ? "T" : "F";
String h2ooo = h2.isOutOfOrder() ? "T" : "F";
String resultooo = result.isOutOfOrder() ? "T" : "F";
String row = String.format(dataFmt,
n1, n2, tot,
lgMaxK, lgK1, lgK2, lgKR,
t1str, t2str,
mode1, mode2, modeR,
h1ooo, h2ooo, resultooo,
uEst, rerr);
println(h1SketchStr);
println(h2SketchStr);
println(uH1SketchStr);
println(uSketchStr);
println(cSketchStr);
println(hdr);
println(row);
assertTrue((controlUb - controlEst) >= 0);
assertTrue((uUb - uEst) >= 0);
assertTrue((controlEst - controlLb) >= 0);
assertTrue((uEst -uLb) >= 0);
}
@Test
public void checkToFromUnion1() {
for (int i = 0; i < 10; i++) {
int n = nArr[i];
for (int lgK = 4; lgK <= 13; lgK++) {
toFrom1(lgK, HLL_4, n);
toFrom1(lgK, HLL_6, n);
toFrom1(lgK, HLL_8, n);
}
println("=======");
}
}
private static void toFrom1(int lgK, TgtHllType tgtHllType, int n) {
Union srcU = newUnion(lgK);
HllSketch srcSk = new HllSketch(lgK, tgtHllType);
for (int i = 0; i < n; i++) {
srcSk.update(i);
}
println("n: " + n + ", lgK: " + lgK + ", type: " + tgtHllType);
//printSketch(src, "SRC");
srcU.update(srcSk);
byte[] byteArr = srcU.toCompactByteArray();
Memory mem = Memory.wrap(byteArr);
Union dstU = Union.heapify(mem);
assertEquals(dstU.getEstimate(), srcU.getEstimate(), 0.0);
}
@Test
public void checkToFromUnion2() {
for (int i = 0; i < 10; i++) {
int n = nArr[i];
for (int lgK = 4; lgK <= 13; lgK++) {
toFrom2(lgK, HLL_4, n);
toFrom2(lgK, HLL_6, n);
toFrom2(lgK, HLL_8, n);
}
println("=======");
}
}
private static void toFrom2(int lgK, TgtHllType tgtHllType, int n) {
Union srcU = newUnion(lgK);
HllSketch srcSk = new HllSketch(lgK, tgtHllType);
for (int i = 0; i < n; i++) {
srcSk.update(i);
}
println("n: " + n + ", lgK: " + lgK + ", type: " + tgtHllType);
//printSketch(src, "SRC");
srcU.update(srcSk);
byte[] byteArr = srcU.toCompactByteArray();
Union dstU = Union.heapify(byteArr);
assertEquals(dstU.getEstimate(), srcU.getEstimate(), 0.0);
}
@Test
public void checkCompositeEst() {
Union u = newUnion(12);
assertEquals(u.getCompositeEstimate(), 0, .03);
for (int i = 1; i <= 15; i++) { u.update(i); }
assertEquals(u.getCompositeEstimate(), 15, 15 *.03);
for (int i = 15; i <= 1000; i++) { u.update(i); }
assertEquals(u.getCompositeEstimate(), 1000, 1000 * .03);
}
@SuppressWarnings("unused")
@Test
public void checkMisc() {
try {
Union u = newUnion(HllUtil.MIN_LOG_K - 1);
fail();
} catch (SketchesArgumentException e) {
//expected
}
try {
Union u = newUnion(HllUtil.MAX_LOG_K + 1);
fail();
} catch (SketchesArgumentException e) {
//expected
}
Union u = newUnion(7);
HllSketch sk = u.getResult();
assertTrue(sk.isEmpty());
}
@Test
public void checkHeapify() {
Union u = newUnion(16);
for (int i = 0; i < (1 << 20); i++) {
u.update(i);
}
double est1 = u.getEstimate();
byte[] byteArray = u.toUpdatableByteArray();
Union u2 = Union.heapify(byteArray);
assertEquals(u2.getEstimate(), est1, 0.0);
}
@Test //for lgK <= 12
public void checkUbLb() {
int lgK = 4;
int n = 1 << 20;
boolean oooFlag = false;
println("LgK="+lgK+", UB3, " + ((getBound(lgK, true, oooFlag, 3, n) / n) - 1));
println("LgK="+lgK+", UB2, " + ((getBound(lgK, true, oooFlag, 2, n) / n) - 1));
println("LgK="+lgK+", UB1, " + ((getBound(lgK, true, oooFlag, 1, n) / n) - 1));
println("LgK="+lgK+", LB1, " + ((getBound(lgK, false, oooFlag, 1, n) / n) - 1));
println("LgK="+lgK+", LB2, " + ((getBound(lgK, false, oooFlag, 2, n) / n) - 1));
println("LgK="+lgK+", LB3, " + ((getBound(lgK, false, oooFlag, 3, n) / n) - 1));
}
@Test
public void checkEmptyCouponMisc() {
int lgK = 8;
Union union = newUnion(lgK);
for (int i = 0; i < 20; i++) { union.update(i); } //SET mode
union.couponUpdate(0);
assertEquals(union.getEstimate(), 20.0, 0.001);
assertEquals(union.getTgtHllType(), TgtHllType.HLL_8);
assertTrue(union.isMemory());
assertFalse(union.isOffHeap());
int bytes = union.getUpdatableSerializationBytes();
assertTrue(bytes <= Union.getMaxSerializationBytes(lgK));
assertFalse(union.isCompact());
}
@Test
public void checkUnionWithWrap() {
int lgConfigK = 4;
TgtHllType type = TgtHllType.HLL_4;
int n = 2;
HllSketch sk = new HllSketch(lgConfigK, type);
for (int i = 0; i < n; i++) { sk.update(i); }
double est = sk.getEstimate();
byte[] skByteArr = sk.toCompactByteArray();
HllSketch sk2 = HllSketch.wrap(Memory.wrap(skByteArr));
assertEquals(sk2.getEstimate(), est, 0.0);
Union union = newUnion(lgConfigK);
union.update(HllSketch.wrap(Memory.wrap(skByteArr)));
assertEquals(union.getEstimate(), est, 0.0);
}
@Test
public void checkUnionWithWrap2() {
int lgConfigK = 10;
int n = 128;
HllSketch sk1 = new HllSketch(lgConfigK);
for (int i = 0; i < n; i++) { sk1.update(i); }
double est1 = sk1.getEstimate();
byte[] byteArr1 = sk1.toCompactByteArray();
Union union = newUnion(lgConfigK);
union.update(HllSketch.wrap(Memory.wrap(byteArr1)));
double est2 = union.getEstimate();
assertEquals(est2, est1);
}
@Test
public void checkWritableWrap() {
int lgConfigK = 10;
int n = 128;
Union union = newUnion(lgConfigK);
for (int i = 0; i < n; i++) { union.update(i); }
double est = union.getEstimate();
Union union2 = Union.writableWrap(WritableMemory.writableWrap(union.toUpdatableByteArray()));
double est2 = union2.getEstimate();
assertEquals(est2, est, 0.0);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkWritableWrapThrows() {
int lgConfigK = 10;
int n = 128;
HllSketch sk = new HllSketch(lgConfigK, HLL_6);
for (int i = 0; i < n; i++) {sk.update(i); }
Union.writableWrap(WritableMemory.writableWrap(sk.toUpdatableByteArray()));
}
private static Union newUnion(int lgK) {
int bytes = HllSketch.getMaxUpdatableSerializationBytes(lgK, TgtHllType.HLL_8);
WritableMemory wmem = WritableMemory.allocate(bytes);
return new Union(lgK, wmem);
}
private static double getBound(int lgK, boolean ub, boolean oooFlag, int numStdDev, double est) {
double re = RelativeErrorTables.getRelErr(ub, oooFlag, lgK, numStdDev);
return est / (1.0 + re);
}
@Test
public void printlnTest() {
println("PRINTING: "+this.getClass().getName());
}
/**
* @param s value to print
*/
static void println(String s) {
print(s + LS);
}
/**
* @param s value to print
*/
static void print(String s) {
//System.out.print(s); //disable here
}
}