| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.datasketches.frequencies; |
| |
| import static org.apache.datasketches.frequencies.PreambleUtil.FAMILY_BYTE; |
| import static org.apache.datasketches.frequencies.PreambleUtil.FLAGS_BYTE; |
| import static org.apache.datasketches.frequencies.PreambleUtil.PREAMBLE_LONGS_BYTE; |
| import static org.apache.datasketches.frequencies.PreambleUtil.SER_VER_BYTE; |
| import static org.apache.datasketches.frequencies.Util.LG_MIN_MAP_SIZE; |
| import static org.testng.Assert.assertEquals; |
| import static org.testng.Assert.assertFalse; |
| import static org.testng.Assert.assertNotNull; |
| import static org.testng.Assert.assertTrue; |
| import static org.testng.Assert.fail; |
| |
| import org.testng.Assert; |
| import org.testng.annotations.Test; |
| |
| import org.apache.datasketches.memory.Memory; |
| import org.apache.datasketches.memory.WritableMemory; |
| import org.apache.datasketches.ArrayOfLongsSerDe; |
| import org.apache.datasketches.ArrayOfStringsSerDe; |
| import org.apache.datasketches.ArrayOfUtf16StringsSerDe; |
| import org.apache.datasketches.SketchesArgumentException; |
| import org.apache.datasketches.frequencies.ItemsSketch.Row; |
| |
| @SuppressWarnings("javadoc") |
| public class ItemsSketchTest { |
| |
| @Test |
| public void empty() { |
| ItemsSketch<String> sketch = new ItemsSketch<>(1 << LG_MIN_MAP_SIZE); |
| Assert.assertTrue(sketch.isEmpty()); |
| Assert.assertEquals(sketch.getNumActiveItems(), 0); |
| Assert.assertEquals(sketch.getStreamLength(), 0); |
| Assert.assertEquals(sketch.getLowerBound("a"), 0); |
| Assert.assertEquals(sketch.getUpperBound("a"), 0); |
| } |
| |
| @Test |
| public void nullInput() { |
| ItemsSketch<String> sketch = new ItemsSketch<>(1 << LG_MIN_MAP_SIZE); |
| sketch.update(null); |
| Assert.assertTrue(sketch.isEmpty()); |
| Assert.assertEquals(sketch.getNumActiveItems(), 0); |
| Assert.assertEquals(sketch.getStreamLength(), 0); |
| Assert.assertEquals(sketch.getLowerBound(null), 0); |
| Assert.assertEquals(sketch.getUpperBound(null), 0); |
| } |
| |
| @Test |
| public void oneItem() { |
| ItemsSketch<String> sketch = new ItemsSketch<>(1 << LG_MIN_MAP_SIZE); |
| sketch.update("a"); |
| Assert.assertFalse(sketch.isEmpty()); |
| Assert.assertEquals(sketch.getNumActiveItems(), 1); |
| Assert.assertEquals(sketch.getStreamLength(), 1); |
| Assert.assertEquals(sketch.getEstimate("a"), 1); |
| Assert.assertEquals(sketch.getLowerBound("a"), 1); |
| } |
| |
| @Test |
| public void severalItems() { |
| ItemsSketch<String> sketch = new ItemsSketch<>(1 << LG_MIN_MAP_SIZE); |
| sketch.update("a"); |
| sketch.update("b"); |
| sketch.update("c"); |
| sketch.update("d"); |
| sketch.update("b"); |
| sketch.update("c"); |
| sketch.update("b"); |
| Assert.assertFalse(sketch.isEmpty()); |
| Assert.assertEquals(sketch.getNumActiveItems(), 4); |
| Assert.assertEquals(sketch.getStreamLength(), 7); |
| Assert.assertEquals(sketch.getEstimate("a"), 1); |
| Assert.assertEquals(sketch.getEstimate("b"), 3); |
| Assert.assertEquals(sketch.getEstimate("c"), 2); |
| Assert.assertEquals(sketch.getEstimate("d"), 1); |
| |
| ItemsSketch.Row<String>[] items = sketch.getFrequentItems(ErrorType.NO_FALSE_POSITIVES); |
| Assert.assertEquals(items.length, 4); |
| |
| items = sketch.getFrequentItems(3, ErrorType.NO_FALSE_POSITIVES); |
| Assert.assertEquals(items.length, 1); |
| Assert.assertEquals(items[0].getItem(), "b"); |
| |
| sketch.reset(); |
| Assert.assertTrue(sketch.isEmpty()); |
| Assert.assertEquals(sketch.getNumActiveItems(), 0); |
| Assert.assertEquals(sketch.getStreamLength(), 0); |
| } |
| |
| @Test |
| public void estimationMode() { |
| ItemsSketch<Integer> sketch = new ItemsSketch<>(1 << LG_MIN_MAP_SIZE); |
| sketch.update(1, 10); |
| sketch.update(2); |
| sketch.update(3); |
| sketch.update(4); |
| sketch.update(5); |
| sketch.update(6); |
| sketch.update(7, 15); |
| sketch.update(8); |
| sketch.update(9); |
| sketch.update(10); |
| sketch.update(11); |
| sketch.update(12); |
| |
| Assert.assertFalse(sketch.isEmpty()); |
| Assert.assertEquals(sketch.getStreamLength(), 35); |
| |
| { |
| ItemsSketch.Row<Integer>[] items = |
| sketch.getFrequentItems(ErrorType.NO_FALSE_POSITIVES); |
| Assert.assertEquals(items.length, 2); |
| // only 2 items (1 and 7) should have counts more than 1 |
| int count = 0; |
| for (ItemsSketch.Row<Integer> item: items) { |
| if (item.getLowerBound() > 1) { |
| count++; |
| } |
| } |
| Assert.assertEquals(count, 2); |
| } |
| |
| { |
| ItemsSketch.Row<Integer>[] items = |
| sketch.getFrequentItems(ErrorType.NO_FALSE_NEGATIVES); |
| Assert.assertTrue(items.length >= 2); |
| // only 2 items (1 and 7) should have counts more than 1 |
| int count = 0; |
| for (ItemsSketch.Row<Integer> item: items) { |
| if (item.getLowerBound() > 5) { |
| count++; |
| } |
| } |
| Assert.assertEquals(count, 2); |
| } |
| } |
| |
| @Test |
| public void serializeStringDeserializeEmpty() { |
| ItemsSketch<String> sketch1 = new ItemsSketch<>(1 << LG_MIN_MAP_SIZE); |
| byte[] bytes = sketch1.toByteArray(new ArrayOfStringsSerDe()); |
| ItemsSketch<String> sketch2 = |
| ItemsSketch.getInstance(Memory.wrap(bytes), new ArrayOfStringsSerDe()); |
| Assert.assertTrue(sketch2.isEmpty()); |
| Assert.assertEquals(sketch2.getNumActiveItems(), 0); |
| Assert.assertEquals(sketch2.getStreamLength(), 0); |
| } |
| |
| @Test |
| public void serializeDeserializeUft8Strings() { |
| ItemsSketch<String> sketch1 = new ItemsSketch<>(1 << LG_MIN_MAP_SIZE); |
| sketch1.update("aaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); |
| sketch1.update("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); |
| sketch1.update("ccccccccccccccccccccccccccccc"); |
| sketch1.update("ddddddddddddddddddddddddddddd"); |
| |
| byte[] bytes = sketch1.toByteArray(new ArrayOfStringsSerDe()); |
| ItemsSketch<String> sketch2 = |
| ItemsSketch.getInstance(Memory.wrap(bytes), new ArrayOfStringsSerDe()); |
| sketch2.update("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); |
| sketch2.update("ccccccccccccccccccccccccccccc"); |
| sketch2.update("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); |
| |
| Assert.assertFalse(sketch2.isEmpty()); |
| Assert.assertEquals(sketch2.getNumActiveItems(), 4); |
| Assert.assertEquals(sketch2.getStreamLength(), 7); |
| Assert.assertEquals(sketch2.getEstimate("aaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), 1); |
| Assert.assertEquals(sketch2.getEstimate("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb"), 3); |
| Assert.assertEquals(sketch2.getEstimate("ccccccccccccccccccccccccccccc"), 2); |
| Assert.assertEquals(sketch2.getEstimate("ddddddddddddddddddddddddddddd"), 1); |
| } |
| |
| @Test |
| public void serializeDeserializeUtf16Strings() { |
| ItemsSketch<String> sketch1 = new ItemsSketch<>(1 << LG_MIN_MAP_SIZE); |
| sketch1.update("aaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); |
| sketch1.update("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); |
| sketch1.update("ccccccccccccccccccccccccccccc"); |
| sketch1.update("ddddddddddddddddddddddddddddd"); |
| |
| byte[] bytes = sketch1.toByteArray(new ArrayOfUtf16StringsSerDe()); |
| ItemsSketch<String> sketch2 = |
| ItemsSketch.getInstance(Memory.wrap(bytes), new ArrayOfUtf16StringsSerDe()); |
| sketch2.update("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); |
| sketch2.update("ccccccccccccccccccccccccccccc"); |
| sketch2.update("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); |
| |
| Assert.assertFalse(sketch2.isEmpty()); |
| Assert.assertEquals(sketch2.getNumActiveItems(), 4); |
| Assert.assertEquals(sketch2.getStreamLength(), 7); |
| Assert.assertEquals(sketch2.getEstimate("aaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), 1); |
| Assert.assertEquals(sketch2.getEstimate("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb"), 3); |
| Assert.assertEquals(sketch2.getEstimate("ccccccccccccccccccccccccccccc"), 2); |
| Assert.assertEquals(sketch2.getEstimate("ddddddddddddddddddddddddddddd"), 1); |
| } |
| |
| @Test |
| public void forceResize() { |
| ItemsSketch<String> sketch1 = new ItemsSketch<>(2 << LG_MIN_MAP_SIZE); |
| for (int i=0; i<32; i++) { |
| sketch1.update(Integer.toString(i), i*i); |
| } |
| } |
| |
| @Test |
| public void getRowHeader() { |
| String header = ItemsSketch.Row.getRowHeader(); |
| Assert.assertNotNull(header); |
| Assert.assertTrue(header.length() > 0); |
| } |
| |
| @Test |
| public void serializeLongDeserialize() { |
| ItemsSketch<Long> sketch1 = new ItemsSketch<>(1 << LG_MIN_MAP_SIZE); |
| sketch1.update(1L); |
| sketch1.update(2L); |
| sketch1.update(3L); |
| sketch1.update(4L); |
| |
| String s = sketch1.toString(); |
| println(s); |
| |
| byte[] bytes = sketch1.toByteArray(new ArrayOfLongsSerDe()); |
| ItemsSketch<Long> sketch2 = |
| ItemsSketch.getInstance(Memory.wrap(bytes), new ArrayOfLongsSerDe()); |
| sketch2.update(2L); |
| sketch2.update(3L); |
| sketch2.update(2L); |
| |
| Assert.assertFalse(sketch2.isEmpty()); |
| Assert.assertEquals(sketch2.getNumActiveItems(), 4); |
| Assert.assertEquals(sketch2.getStreamLength(), 7); |
| Assert.assertEquals(sketch2.getEstimate(1L), 1); |
| Assert.assertEquals(sketch2.getEstimate(2L), 3); |
| Assert.assertEquals(sketch2.getEstimate(3L), 2); |
| Assert.assertEquals(sketch2.getEstimate(4L), 1); |
| } |
| |
| @Test |
| public void mergeExact() { |
| ItemsSketch<String> sketch1 = new ItemsSketch<>(1 << LG_MIN_MAP_SIZE); |
| sketch1.update("a"); |
| sketch1.update("b"); |
| sketch1.update("c"); |
| sketch1.update("d"); |
| |
| ItemsSketch<String> sketch2 = new ItemsSketch<>(1 << LG_MIN_MAP_SIZE); |
| sketch2.update("b"); |
| sketch2.update("c"); |
| sketch2.update("b"); |
| |
| sketch1.merge(sketch2); |
| Assert.assertFalse(sketch1.isEmpty()); |
| Assert.assertEquals(sketch1.getNumActiveItems(), 4); |
| Assert.assertEquals(sketch1.getStreamLength(), 7); |
| Assert.assertEquals(sketch1.getEstimate("a"), 1); |
| Assert.assertEquals(sketch1.getEstimate("b"), 3); |
| Assert.assertEquals(sketch1.getEstimate("c"), 2); |
| Assert.assertEquals(sketch1.getEstimate("d"), 1); |
| } |
| |
| @Test |
| public void checkNullMapReturns() { |
| ReversePurgeItemHashMap<Long> map = new ReversePurgeItemHashMap<>(1 << LG_MIN_MAP_SIZE); |
| Assert.assertNull(map.getActiveKeys()); |
| Assert.assertNull(map.getActiveValues()); |
| } |
| |
| @SuppressWarnings("unlikely-arg-type") |
| @Test |
| public void checkMisc() { |
| ItemsSketch<Long> sk1 = new ItemsSketch<>(1 << LG_MIN_MAP_SIZE); |
| Assert.assertEquals(sk1.getCurrentMapCapacity(), 6); |
| Assert.assertEquals(sk1.getEstimate(Long.valueOf(1)), 0); |
| ItemsSketch<Long> sk2 = new ItemsSketch<>(8); |
| Assert.assertEquals(sk1.merge(sk2), sk1 ); |
| Assert.assertEquals(sk1.merge(null), sk1); |
| sk1.update(Long.valueOf(1)); |
| ItemsSketch.Row<Long>[] rows = sk1.getFrequentItems(ErrorType.NO_FALSE_NEGATIVES); |
| ItemsSketch.Row<Long> row = rows[0]; |
| Assert.assertTrue(row.hashCode() != 0); |
| Assert.assertTrue(row.equals(row)); |
| Assert.assertFalse(row.equals(sk1)); |
| Assert.assertEquals((long)row.getItem(), 1L); |
| Assert.assertEquals(row.getEstimate(), 1); |
| Assert.assertEquals(row.getUpperBound(), 1); |
| String s = row.toString(); |
| println(s); |
| ItemsSketch.Row<Long> nullRow = null; //check equals(null) |
| Assert.assertFalse(row.equals(nullRow)); |
| } |
| |
| @Test |
| public void checkToString() { |
| ItemsSketch<Long> sk = new ItemsSketch<>(1 << LG_MIN_MAP_SIZE); |
| sk.update(Long.valueOf(1)); |
| println(ItemsSketch.toString(sk.toByteArray(new ArrayOfLongsSerDe()))); |
| } |
| |
| @Test |
| public void checkGetFrequentItems1() { |
| ItemsSketch<Long> fis = new ItemsSketch<>(1 << LG_MIN_MAP_SIZE); |
| fis.update(1L); |
| Row<Long>[] rowArr = fis.getFrequentItems(ErrorType.NO_FALSE_POSITIVES); |
| Row<Long> row = rowArr[0]; |
| assertNotNull(row); |
| assertEquals(row.est, 1L); |
| assertEquals(row.item, Long.valueOf(1L)); |
| assertEquals(row.lb, 1L); |
| assertEquals(row.ub, 1L); |
| Row<Long> newRow = new Row<>(row.item, row.est+1, row.ub, row.lb); |
| assertFalse(row.equals(newRow)); |
| newRow = new Row<>(row.item, row.est, row.ub, row.lb); |
| assertTrue(row.equals(newRow)); |
| } |
| |
| @Test(expectedExceptions = SketchesArgumentException.class) |
| public void checkUpdateException() { |
| ItemsSketch<Long> sk1 = new ItemsSketch<>(1 << LG_MIN_MAP_SIZE); |
| sk1.update(Long.valueOf(1), -1); |
| } |
| |
| @Test |
| public void checkMemExceptions() { |
| ItemsSketch<Long> sk1 = new ItemsSketch<>(1 << LG_MIN_MAP_SIZE); |
| sk1.update(Long.valueOf(1), 1); |
| ArrayOfLongsSerDe serDe = new ArrayOfLongsSerDe(); |
| byte[] byteArr = sk1.toByteArray(serDe); |
| WritableMemory mem = WritableMemory.writableWrap(byteArr); |
| //FrequentItemsSketch<Long> sk2 = FrequentItemsSketch.getInstance(mem, serDe); |
| //println(sk2.toString()); |
| long pre0 = mem.getLong(0); //The correct first 8 bytes. |
| //Now start corrupting |
| tryBadMem(mem, PREAMBLE_LONGS_BYTE, 2); //Corrupt |
| mem.putLong(0, pre0); //restore |
| |
| tryBadMem(mem, SER_VER_BYTE, 2); //Corrupt |
| mem.putLong(0, pre0); //restore |
| |
| tryBadMem(mem, FAMILY_BYTE, 2); //Corrupt |
| mem.putLong(0, pre0); //restore |
| |
| tryBadMem(mem, FLAGS_BYTE, 4); //Corrupt to true |
| mem.putLong(0, pre0); //restore |
| } |
| |
| @Test |
| public void oneItemUtf8() { |
| ItemsSketch<String> sketch1 = new ItemsSketch<>(1 << LG_MIN_MAP_SIZE); |
| sketch1.update("\u5fb5"); |
| Assert.assertFalse(sketch1.isEmpty()); |
| Assert.assertEquals(sketch1.getNumActiveItems(), 1); |
| Assert.assertEquals(sketch1.getStreamLength(), 1); |
| Assert.assertEquals(sketch1.getEstimate("\u5fb5"), 1); |
| |
| byte[] bytes = sketch1.toByteArray(new ArrayOfStringsSerDe()); |
| ItemsSketch<String> sketch2 = |
| ItemsSketch.getInstance(Memory.wrap(bytes), new ArrayOfStringsSerDe()); |
| Assert.assertFalse(sketch2.isEmpty()); |
| Assert.assertEquals(sketch2.getNumActiveItems(), 1); |
| Assert.assertEquals(sketch2.getStreamLength(), 1); |
| Assert.assertEquals(sketch2.getEstimate("\u5fb5"), 1); |
| } |
| |
| @Test |
| public void checkGetEpsilon() { |
| assertEquals(ItemsSketch.getEpsilon(1024), 3.5 / 1024, 0.0); |
| try { |
| ItemsSketch.getEpsilon(1000); |
| } catch (SketchesArgumentException e) { } |
| } |
| |
| @Test |
| public void checkGetAprioriError() { |
| double eps = 3.5 / 1024; |
| assertEquals(ItemsSketch.getAprioriError(1024, 10_000), eps * 10_000); |
| } |
| |
| @Test |
| public void printlnTest() { |
| println("PRINTING: " + this.getClass().getName()); |
| } |
| |
| //Restricted methods |
| |
| private static void tryBadMem(WritableMemory mem, int byteOffset, int byteValue) { |
| ArrayOfLongsSerDe serDe = new ArrayOfLongsSerDe(); |
| try { |
| mem.putByte(byteOffset, (byte) byteValue); //Corrupt |
| ItemsSketch.getInstance(mem, serDe); |
| fail(); |
| } catch (SketchesArgumentException e) { |
| //expected |
| } |
| } |
| |
| |
| /** |
| * @param s value to print |
| */ |
| static void println(String s) { |
| //System.out.println(s); //disable here |
| } |
| } |