| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.datasketches.hll; |
| |
| import static org.apache.datasketches.hll.HllSketch.getMaxUpdatableSerializationBytes; |
| import static org.apache.datasketches.hll.HllUtil.LG_AUX_ARR_INTS; |
| import static org.apache.datasketches.hll.HllUtil.LG_INIT_LIST_SIZE; |
| import static org.apache.datasketches.hll.HllUtil.LG_INIT_SET_SIZE; |
| import static org.apache.datasketches.hll.PreambleUtil.HASH_SET_INT_ARR_START; |
| import static org.apache.datasketches.hll.PreambleUtil.HLL_BYTE_ARR_START; |
| import static org.apache.datasketches.hll.PreambleUtil.LIST_INT_ARR_START; |
| import static org.apache.datasketches.hll.TgtHllType.HLL_4; |
| import static org.apache.datasketches.hll.TgtHllType.HLL_6; |
| import static org.apache.datasketches.hll.TgtHllType.HLL_8; |
| import static org.testng.Assert.assertEquals; |
| import static org.testng.Assert.assertFalse; |
| import static org.testng.Assert.assertNull; |
| import static org.testng.Assert.assertTrue; |
| import static org.testng.Assert.fail; |
| |
| import org.apache.datasketches.SketchesArgumentException; |
| import org.apache.datasketches.memory.Memory; |
| import org.apache.datasketches.memory.WritableMemory; |
| import org.testng.annotations.Test; |
| |
| /** |
| * @author Lee Rhodes |
| */ |
| @SuppressWarnings("javadoc") |
| public class HllSketchTest { |
| |
| @Test |
| public void checkCopies() { |
| runCheckCopy(14, HLL_4, null); |
| runCheckCopy(8, HLL_6, null); |
| runCheckCopy(8, HLL_8, null); |
| |
| int bytes = getMaxUpdatableSerializationBytes(14, TgtHllType.HLL_8); |
| WritableMemory wmem = WritableMemory.allocate(bytes); |
| |
| runCheckCopy(14, HLL_4, wmem); |
| runCheckCopy(8, HLL_6, wmem); |
| runCheckCopy(8, HLL_8, wmem); |
| } |
| |
| private static void runCheckCopy(int lgConfigK, TgtHllType tgtHllType, WritableMemory wmem) { |
| HllSketch sk; |
| if (wmem == null) { //heap |
| sk = new HllSketch(lgConfigK, tgtHllType); |
| } else { //direct |
| sk = new HllSketch(lgConfigK, tgtHllType, wmem); |
| } |
| |
| for (int i = 0; i < 7; i++) { |
| sk.update(i); |
| } |
| assertEquals(sk.getCurMode(), CurMode.LIST); |
| |
| HllSketch skCopy = sk.copy(); |
| assertEquals(skCopy.getCurMode(), CurMode.LIST); |
| HllSketchImpl impl1 = sk.hllSketchImpl; |
| |
| HllSketchImpl impl2 = skCopy.hllSketchImpl; |
| AbstractCoupons absCoupons1 = (AbstractCoupons) sk.hllSketchImpl; |
| AbstractCoupons absCoupons2 = (AbstractCoupons) skCopy.hllSketchImpl; |
| assertEquals(absCoupons1.getCouponCount(), absCoupons2.getCouponCount()); |
| assertEquals(impl1.getEstimate(), impl2.getEstimate(), 0.0); |
| assertFalse(impl1 == impl2); |
| |
| for (int i = 7; i < 24; i++) { |
| sk.update(i); |
| } |
| assertEquals(sk.getCurMode(), CurMode.SET); |
| |
| skCopy = sk.copy(); |
| assertEquals(skCopy.getCurMode(), CurMode.SET); |
| impl1 = sk.hllSketchImpl; |
| |
| impl2 = skCopy.hllSketchImpl; |
| absCoupons1 = (AbstractCoupons) sk.hllSketchImpl; |
| absCoupons2 = (AbstractCoupons) skCopy.hllSketchImpl; |
| assertEquals(absCoupons1.getCouponCount(), absCoupons2.getCouponCount()); |
| assertEquals(impl1.getEstimate(), impl2.getEstimate(), 0.0); |
| assertFalse(impl1 == impl2); |
| final int u = (sk.getTgtHllType() == TgtHllType.HLL_4) ? 100000 : 25; |
| for (int i = 24; i < u; i++) { |
| sk.update(i); |
| } |
| sk.getCompactSerializationBytes(); |
| assertEquals(sk.getCurMode(), CurMode.HLL); |
| |
| skCopy = sk.copy(); |
| assertEquals(skCopy.getCurMode(), CurMode.HLL); |
| impl1 = sk.hllSketchImpl; |
| |
| impl2 = skCopy.hllSketchImpl; |
| assertEquals(impl1.getEstimate(), impl2.getEstimate(), 0.0); |
| assertFalse(impl1 == impl2); |
| } |
| |
| @Test |
| public void checkCopyAs() { |
| copyAs(HLL_4, HLL_4, false); |
| copyAs(HLL_4, HLL_6, false); |
| copyAs(HLL_4, HLL_8, false); |
| copyAs(HLL_6, HLL_4, false); |
| copyAs(HLL_6, HLL_6, false); |
| copyAs(HLL_6, HLL_8, false); |
| copyAs(HLL_8, HLL_4, false); |
| copyAs(HLL_8, HLL_6, false); |
| copyAs(HLL_8, HLL_8, false); |
| copyAs(HLL_4, HLL_4, true); |
| copyAs(HLL_4, HLL_6, true); |
| copyAs(HLL_4, HLL_8, true); |
| copyAs(HLL_6, HLL_4, true); |
| copyAs(HLL_6, HLL_6, true); |
| copyAs(HLL_6, HLL_8, true); |
| copyAs(HLL_8, HLL_4, true); |
| copyAs(HLL_8, HLL_6, true); |
| copyAs(HLL_8, HLL_8, true); |
| } |
| |
| private static void copyAs(TgtHllType srcType, TgtHllType dstType, boolean direct) { |
| int lgK = 8; |
| int n1 = 7; |
| int n2 = 24; |
| int n3 = 1000; |
| int base = 0; |
| int bytes = getMaxUpdatableSerializationBytes(lgK, srcType); |
| WritableMemory wmem = WritableMemory.allocate(bytes); |
| |
| HllSketch src = (direct) ? new HllSketch(lgK, srcType, wmem) : new HllSketch(lgK, srcType); |
| for (int i = 0; i < n1; i++) { src.update(i + base); } |
| HllSketch dst = src.copyAs(dstType); |
| assertEquals(dst.getEstimate(), src.getEstimate(), 0.0); |
| |
| for (int i = n1; i < n2; i++) { src.update(i); } |
| dst = src.copyAs(dstType); |
| assertEquals(dst.getEstimate(), src.getEstimate(), 0.0); |
| |
| for (int i = n2; i < n3; i++) { src.update(i); } |
| dst = src.copyAs(dstType); |
| assertEquals(dst.getEstimate(), src.getEstimate(), 0.0); |
| } |
| |
| @Test |
| public void checkMisc1() { |
| misc(false); |
| misc(true); |
| } |
| |
| private static void misc(boolean direct) { |
| int lgConfigK = 8; |
| TgtHllType srcType = TgtHllType.HLL_8; |
| int bytes = getMaxUpdatableSerializationBytes(lgConfigK, srcType); |
| WritableMemory wmem = WritableMemory.allocate(bytes); |
| |
| HllSketch sk = (direct) |
| ? new HllSketch(lgConfigK, srcType, wmem) : new HllSketch(lgConfigK, srcType); |
| |
| for (int i = 0; i < 7; i++) { sk.update(i); } //LIST |
| AbstractCoupons absCoupons = (AbstractCoupons) sk.hllSketchImpl; |
| assertEquals(absCoupons.getCouponCount(), 7); |
| assertEquals(sk.getCompactSerializationBytes(), 36); |
| assertEquals(sk.getUpdatableSerializationBytes(), 40); |
| |
| for (int i = 7; i < 24; i++) { sk.update(i); } //SET |
| absCoupons = (AbstractCoupons) sk.hllSketchImpl; |
| assertEquals(absCoupons.getCouponCount(), 24); |
| assertEquals(sk.getCompactSerializationBytes(), 108); |
| assertEquals(sk.getUpdatableSerializationBytes(), 140); |
| |
| sk.update(24); //HLL |
| AbstractHllArray absHll = (AbstractHllArray) sk.hllSketchImpl; |
| assertNull(absHll.getAuxIterator()); |
| assertEquals(absHll.getCurMin(), 0); |
| assertEquals(absHll.getHipAccum(), 25.0, 25 * .02); |
| assertTrue(absHll.getNumAtCurMin() >= 0); |
| assertEquals(sk.getUpdatableSerializationBytes(), 40 + 256); |
| assertEquals(absHll.getMemDataStart(), 40); |
| assertEquals(absHll.getPreInts(), 10); |
| |
| |
| final int hllBytes = PreambleUtil.HLL_BYTE_ARR_START + (1 << lgConfigK); |
| assertEquals(sk.getCompactSerializationBytes(), hllBytes); |
| assertEquals(getMaxUpdatableSerializationBytes(lgConfigK, TgtHllType.HLL_8), hllBytes); |
| } |
| |
| @Test(expectedExceptions = SketchesArgumentException.class) |
| public void checkNumStdDev() { |
| HllUtil.checkNumStdDev(0); |
| } |
| |
| @Test |
| public void checkSerSizes() { |
| checkSerSizes(8, TgtHllType.HLL_8, false); |
| checkSerSizes(8, TgtHllType.HLL_8, true); |
| checkSerSizes(8, TgtHllType.HLL_6, false); |
| checkSerSizes(8, TgtHllType.HLL_6, true); |
| checkSerSizes(8, TgtHllType.HLL_4, false); |
| checkSerSizes(8, TgtHllType.HLL_4, true); |
| } |
| |
| private static void checkSerSizes(int lgConfigK, TgtHllType tgtHllType, boolean direct) { |
| int bytes = getMaxUpdatableSerializationBytes(lgConfigK, tgtHllType); |
| WritableMemory wmem = WritableMemory.allocate(bytes); |
| HllSketch sk = (direct) |
| ? new HllSketch(lgConfigK, tgtHllType, wmem) : new HllSketch(lgConfigK, tgtHllType); |
| int i; |
| |
| //LIST |
| for (i = 0; i < 7; i++) { sk.update(i); } |
| int expected = LIST_INT_ARR_START + (i << 2); |
| assertEquals(sk.getCompactSerializationBytes(), expected); |
| expected = LIST_INT_ARR_START + (4 << LG_INIT_LIST_SIZE); |
| assertEquals(sk.getUpdatableSerializationBytes(), expected); |
| |
| //SET |
| for (i = 7; i < 24; i++) { sk.update(i); } |
| expected = HASH_SET_INT_ARR_START + (i << 2); |
| assertEquals(sk.getCompactSerializationBytes(), expected); |
| expected = HASH_SET_INT_ARR_START + (4 << LG_INIT_SET_SIZE); |
| assertEquals(sk.getUpdatableSerializationBytes(), expected); |
| |
| //HLL |
| sk.update(i); |
| assertEquals(sk.getCurMode(), CurMode.HLL); |
| AbstractHllArray absHll = (AbstractHllArray) sk.hllSketchImpl; |
| |
| int auxCountBytes = 0; |
| int auxArrBytes = 0; |
| if (absHll.tgtHllType == HLL_4) { |
| AuxHashMap auxMap = absHll.getAuxHashMap(); |
| if (auxMap != null) { |
| auxCountBytes = auxMap.getAuxCount() << 2; |
| auxArrBytes = 4 << auxMap.getLgAuxArrInts(); |
| } else { |
| auxArrBytes = 4 << LG_AUX_ARR_INTS[lgConfigK]; |
| } |
| } |
| int hllArrBytes = absHll.getHllByteArrBytes(); |
| expected = HLL_BYTE_ARR_START + hllArrBytes + auxCountBytes; |
| assertEquals(sk.getCompactSerializationBytes(), expected); |
| expected = HLL_BYTE_ARR_START + hllArrBytes + auxArrBytes; |
| assertEquals(sk.getUpdatableSerializationBytes(), expected); |
| int fullAuxBytes = (tgtHllType == TgtHllType.HLL_4) ? (4 << LG_AUX_ARR_INTS[lgConfigK]) : 0; |
| expected = HLL_BYTE_ARR_START + hllArrBytes + fullAuxBytes; |
| assertEquals(getMaxUpdatableSerializationBytes(lgConfigK, tgtHllType), expected); |
| } |
| |
| @SuppressWarnings("unused") |
| @Test |
| public void checkConfigKLimits() { |
| try { |
| HllSketch sk = new HllSketch(HllUtil.MIN_LOG_K - 1); |
| fail(); |
| } catch (SketchesArgumentException e) { |
| //expected |
| } |
| try { |
| HllSketch sk = new HllSketch(HllUtil.MAX_LOG_K + 1); |
| fail(); |
| } catch (SketchesArgumentException e) { |
| //expected |
| } |
| } |
| |
| @Test |
| public void exerciseToStringDetails() { |
| HllSketch sk = new HllSketch(15, TgtHllType.HLL_4); |
| for (int i = 0; i < 25; i++) { sk.update(i); } |
| println(sk.toString(false, true, true, true)); //SET mode |
| for (int i = 25; i < (1 << 12); i++) { sk.update(i); } |
| println(sk.toString(false, true, true, true)); //HLL mode no Aux |
| for (int i = (1 << 12); i < (1 << 15); i++) { sk.update(i); } //Aux with exceptions |
| println(sk.toString(false, true, true, true)); |
| println(sk.toString(false, true, true, false)); |
| println(sk.toString(false, true, true)); |
| sk = new HllSketch(8, TgtHllType.HLL_6); |
| for (int i = 0; i < 25; i++) { sk.update(i); } |
| println(sk.toString(false, true, true, true)); |
| } |
| |
| @SuppressWarnings("unused") |
| @Test |
| public void checkMemoryNotLargeEnough() { |
| int bytes = getMaxUpdatableSerializationBytes(8, TgtHllType.HLL_8); |
| WritableMemory wmem = WritableMemory.allocate(bytes -1); |
| try { |
| HllSketch sk = new HllSketch(8, TgtHllType.HLL_8, wmem); |
| fail(); |
| } catch (SketchesArgumentException e) { |
| //OK |
| } |
| } |
| |
| @Test |
| public void checkEmptyCoupon() { |
| int lgK = 8; |
| TgtHllType type = TgtHllType.HLL_8; |
| HllSketch sk = new HllSketch(lgK, type); |
| for (int i = 0; i < 20; i++) { sk.update(i); } //SET mode |
| sk.couponUpdate(0); |
| assertEquals(sk.getEstimate(), 20.0, 0.001); |
| } |
| |
| @Test |
| public void checkCompactFlag() { |
| int lgK = 8; |
| //LIST: follows the toByteArray request |
| assertEquals(checkCompact(lgK, 7, HLL_8, false, false), false); |
| assertEquals(checkCompact(lgK, 7, HLL_8, false, true), true); |
| assertEquals(checkCompact(lgK, 7, HLL_8, false, false), false); |
| assertEquals(checkCompact(lgK, 7, HLL_8, false, true), true); |
| assertEquals(checkCompact(lgK, 7, HLL_8, true, false), false); |
| assertEquals(checkCompact(lgK, 7, HLL_8, true, true), true); |
| assertEquals(checkCompact(lgK, 7, HLL_8, true, false), false); |
| assertEquals(checkCompact(lgK, 7, HLL_8, true, true), true); |
| |
| //SET: follows the toByteArray request |
| assertEquals(checkCompact(lgK, 24, HLL_8, false, false), false); |
| assertEquals(checkCompact(lgK, 24, HLL_8, false, true), true); |
| assertEquals(checkCompact(lgK, 24, HLL_8, false, false), false); |
| assertEquals(checkCompact(lgK, 24, HLL_8, false, true), true); |
| assertEquals(checkCompact(lgK, 24, HLL_8, true, false), false); |
| assertEquals(checkCompact(lgK, 24, HLL_8, true, true), true); |
| assertEquals(checkCompact(lgK, 24, HLL_8, true, false), false); |
| assertEquals(checkCompact(lgK, 24, HLL_8, true, true), true); |
| |
| //HLL8: always updatable |
| assertEquals(checkCompact(lgK, 25, HLL_8, false, false), false); |
| assertEquals(checkCompact(lgK, 25, HLL_8, false, true), false); |
| assertEquals(checkCompact(lgK, 25, HLL_8, false, false), false); |
| assertEquals(checkCompact(lgK, 25, HLL_8, false, true), false); |
| assertEquals(checkCompact(lgK, 25, HLL_8, true, false), false); |
| assertEquals(checkCompact(lgK, 25, HLL_8, true, true), false); |
| assertEquals(checkCompact(lgK, 25, HLL_8, true, false), false); |
| assertEquals(checkCompact(lgK, 25, HLL_8, true, true), false); |
| |
| //HLL6: always updatable |
| assertEquals(checkCompact(lgK, 25, HLL_6, false, false), false); |
| assertEquals(checkCompact(lgK, 25, HLL_6, false, true), false); |
| assertEquals(checkCompact(lgK, 25, HLL_6, false, false), false); |
| assertEquals(checkCompact(lgK, 25, HLL_6, false, true), false); |
| assertEquals(checkCompact(lgK, 25, HLL_6, true, false), false); |
| assertEquals(checkCompact(lgK, 25, HLL_6, true, true), false); |
| assertEquals(checkCompact(lgK, 25, HLL_6, true, false), false); |
| assertEquals(checkCompact(lgK, 25, HLL_6, true, true), false); |
| |
| //HLL:4 follows the toByteArray request |
| assertEquals(checkCompact(lgK, 25, HLL_4, false, false), false); |
| assertEquals(checkCompact(lgK, 25, HLL_4, false, true), true); |
| assertEquals(checkCompact(lgK, 25, HLL_4, false, false), false); |
| assertEquals(checkCompact(lgK, 25, HLL_4, false, true), true); |
| assertEquals(checkCompact(lgK, 25, HLL_4, true, false), false); |
| assertEquals(checkCompact(lgK, 25, HLL_4, true, true), true); |
| assertEquals(checkCompact(lgK, 25, HLL_4, true, false), false); |
| assertEquals(checkCompact(lgK, 25, HLL_4, true, true), true); |
| } |
| |
| //Creates either a direct or heap sketch, |
| // Serializes to either compact or updatable form. |
| // Confirms the isMemory() for direct, isOffHeap(), and the |
| // get compact or updatable serialization bytes. |
| // Returns true if the compact flag is set. |
| private static boolean checkCompact(int lgK, int n, TgtHllType type, boolean direct, |
| boolean compact) { |
| int bytes = HllSketch.getMaxUpdatableSerializationBytes(lgK, type); |
| WritableMemory wmem = WritableMemory.allocate(bytes); |
| HllSketch sk = (direct) ? new HllSketch(lgK, type, wmem) : new HllSketch(lgK, type); |
| assertEquals(sk.isMemory(), direct); |
| assertFalse(sk.isOffHeap()); |
| for (int i = 0; i < n; i++) { sk.update(i); } //LOAD |
| byte[] byteArr = (compact) ? sk.toCompactByteArray() : sk.toUpdatableByteArray(); |
| int len = byteArr.length; |
| if (compact) { |
| assertEquals(len, sk.getCompactSerializationBytes()); |
| } else { |
| assertEquals(len, sk.getUpdatableSerializationBytes()); |
| } |
| HllSketch sk2 = HllSketch.wrap(Memory.wrap(byteArr)); |
| assertEquals(sk2.getEstimate(), n, .01); |
| boolean resourceCompact = sk2.isCompact(); |
| if (resourceCompact) { |
| try { |
| HllSketch.writableWrap(WritableMemory.writableWrap(byteArr)); |
| fail(); |
| } catch (SketchesArgumentException e) { |
| //OK |
| } |
| } |
| return resourceCompact; |
| //return (byteArr[5] & COMPACT_FLAG_MASK) > 0; |
| } |
| |
| @SuppressWarnings("unused") |
| @Test(expectedExceptions = SketchesArgumentException.class) |
| public void checkWritableWrapOfCompact() { |
| HllSketch sk = new HllSketch(); |
| byte[] byteArr = sk.toCompactByteArray(); |
| WritableMemory wmem = WritableMemory.writableWrap(byteArr); |
| HllSketch sk2 = HllSketch.writableWrap(wmem); |
| } |
| |
| @SuppressWarnings("unused") |
| @Test |
| public void checkJavadocExample() { |
| Union union; HllSketch sk, sk2; |
| int lgK = 12; |
| sk = new HllSketch(lgK, TgtHllType.HLL_4); //can be 4, 6, or 8 |
| for (int i = 0; i < (2 << lgK); i++) { sk.update(i); } |
| byte[] arr = sk.toCompactByteArray(); |
| // ... |
| union = Union.heapify(arr); //initializes the union using data from the array. |
| //OR, if used in an off-heap environment: |
| union = Union.heapify(Memory.wrap(arr)); |
| |
| //To recover an updatable Heap sketch: |
| sk2 = HllSketch.heapify(arr); |
| //OR, if used in an off-heap environment: |
| sk2 = HllSketch.heapify(Memory.wrap(arr)); |
| } |
| |
| @Test |
| public void printlnTest() { |
| println("PRINTING: "+this.getClass().getName()); |
| } |
| |
| /** |
| * @param s value to print |
| */ |
| static void print(String s) { |
| //System.out.print(s); //disable here |
| } |
| |
| /** |
| * @param s value to print |
| */ |
| static void println(String s) { |
| //System.out.println(s); //disable here |
| } |
| } |