Merge pull request #533 from romseygeek/sketch-bytebuffers-update
Allow updating sketches with ByteBuffers
diff --git a/src/main/java/org/apache/datasketches/cpc/CpcSketch.java b/src/main/java/org/apache/datasketches/cpc/CpcSketch.java
index 0e6202c..e94285c 100644
--- a/src/main/java/org/apache/datasketches/cpc/CpcSketch.java
+++ b/src/main/java/org/apache/datasketches/cpc/CpcSketch.java
@@ -29,6 +29,7 @@
import static org.apache.datasketches.cpc.CpcUtil.countBitsSetInMatrix;
import static org.apache.datasketches.hash.MurmurHash3.hash;
+import java.nio.ByteBuffer;
import java.util.Arrays;
import org.apache.datasketches.common.Family;
@@ -363,6 +364,18 @@
}
/**
+ * Present the given ByteBuffer as a potential unique item
+ * If the ByteBuffer is null or empty no update attempt is made and the method returns
+ *
+ * @param data The given ByteBuffer
+ */
+ public void update(final ByteBuffer data) {
+ if ((data == null) || data.hasRemaining() == false) { return; }
+ final long[] arr = hash(data, seed);
+ hashUpdate(arr[0], arr[1]);
+ }
+
+ /**
* Present the given char array as a potential unique item.
* If the char array is null or empty no update attempt is made and the method returns.
*
diff --git a/src/main/java/org/apache/datasketches/theta/Union.java b/src/main/java/org/apache/datasketches/theta/Union.java
index 91aec32..96ea6d2 100644
--- a/src/main/java/org/apache/datasketches/theta/Union.java
+++ b/src/main/java/org/apache/datasketches/theta/Union.java
@@ -23,6 +23,8 @@
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
+import java.nio.ByteBuffer;
+
/**
* Compute the union of two or more theta sketches.
* A new instance represents an empty set.
@@ -182,6 +184,17 @@
public abstract void update(byte[] data);
/**
+ * Update <i>this</i> union with the given ByteBuffer item.
+ * If the ByteBuffer is null or empty no update attempt is made and the method returns.
+ *
+ * <p>Note: this is not a Sketch Union operation. This treats the given ByteBuffer as a data
+ * item.</p>
+ *
+ * @param data The given ByteBuffer.
+ */
+ public abstract void update(ByteBuffer data);
+
+ /**
* Update <i>this</i> union with the given integer array item.
* If the integer array is null or empty no update attempt is made and the method returns.
*
diff --git a/src/main/java/org/apache/datasketches/theta/UnionImpl.java b/src/main/java/org/apache/datasketches/theta/UnionImpl.java
index 5256dc1..8516ec0 100644
--- a/src/main/java/org/apache/datasketches/theta/UnionImpl.java
+++ b/src/main/java/org/apache/datasketches/theta/UnionImpl.java
@@ -47,6 +47,8 @@
import org.apache.datasketches.thetacommon.HashOperations;
import org.apache.datasketches.thetacommon.ThetaUtil;
+import java.nio.ByteBuffer;
+
/**
* Shared code for the HeapUnion and DirectUnion implementations.
*
@@ -481,6 +483,11 @@
}
@Override
+ public void update(ByteBuffer data) {
+ gadget_.update(data);
+ }
+
+ @Override
public void update(final char[] data) {
gadget_.update(data);
}
diff --git a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java
index 5f093a6..02fb599 100644
--- a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java
@@ -42,6 +42,7 @@
import static org.apache.datasketches.theta.PreambleUtil.getMemBytes;
import static org.apache.datasketches.theta.UpdateReturnState.RejectedNullOrEmpty;
+import java.nio.ByteBuffer;
import java.util.Objects;
import org.apache.datasketches.common.Family;
@@ -276,6 +277,21 @@
}
/**
+ * Present this sketch with the given ByteBuffer
+ * If the ByteBuffer is null or empty, no update attempt is made and the method returns.
+ *
+ * @param buffer the input ByteBuffer
+ * @return
+ * <a href="{@docRoot}/resources/dictionary.html#updateReturnState">See Update Return State</a>
+ */
+ public UpdateReturnState update(ByteBuffer buffer) {
+ if (buffer == null || buffer.hasRemaining() == false) {
+ return RejectedNullOrEmpty;
+ }
+ return hashUpdate(hash(buffer, getSeed())[0] >>> 1);
+ }
+
+ /**
* Present this sketch with the given char array.
* If the char array is null or empty no update attempt is made and the method returns.
*
diff --git a/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java b/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java
index 135ce6d..6a38666 100644
--- a/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java
+++ b/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java
@@ -23,6 +23,8 @@
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.thetacommon.ThetaUtil;
+import java.nio.ByteBuffer;
+
/**
* An extension of QuickSelectSketch<S>, which can be updated with many types of keys.
* Summary objects are created using a user-defined SummaryFactory class,
@@ -138,6 +140,18 @@
}
/**
+ * Updates this sketch with a ByteBuffer and U value
+ * The value is passed to the update() method of the Summary object associated with the key
+ *
+ * @param buffer The given ByteBuffer key
+ * @param value The given U value
+ */
+ public void update(final ByteBuffer buffer, final U value) {
+ if (buffer == null || buffer.hasRemaining() == false) { return; }
+ insertOrIgnore(MurmurHash3.hash(buffer, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value);
+ }
+
+ /**
* Updates this sketch with a int[] key and U value.
* The value is passed to update() method of the Summary object associated with the key
*
diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java
index ee8878d..4febb11 100644
--- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java
+++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java
@@ -26,6 +26,8 @@
import org.apache.datasketches.thetacommon.ThetaUtil;
import org.apache.datasketches.tuple.Util;
+import java.nio.ByteBuffer;
+
/**
* The top level for updatable tuple sketches of type ArrayOfDoubles.
*/
@@ -122,6 +124,18 @@
}
/**
+ * Updates this sketch with a ByteBuffer key and double values.
+ * The values will be stored or added to the ones associated with the key
+ *
+ * @param key The given ByteBuffer key
+ * @param values The given values
+ */
+ public void update(final ByteBuffer key, final double[] values) {
+ if (key == null || key.hasRemaining() == false) { return; }
+ insertOrIgnore(MurmurHash3.hash(key, seed_)[0] >>> 1, values);
+ }
+
+ /**
* Updates this sketch with a int[] key and double values.
* The values will be stored or added to the ones associated with the key
*
diff --git a/src/test/java/org/apache/datasketches/cpc/CpcSketchTest.java b/src/test/java/org/apache/datasketches/cpc/CpcSketchTest.java
index a62ffbd..1c03fb4 100644
--- a/src/test/java/org/apache/datasketches/cpc/CpcSketchTest.java
+++ b/src/test/java/org/apache/datasketches/cpc/CpcSketchTest.java
@@ -25,6 +25,7 @@
import static org.testng.Assert.fail;
import java.io.PrintStream;
+import java.nio.ByteBuffer;
import org.apache.datasketches.common.Family;
import org.apache.datasketches.common.SketchesArgumentException;
@@ -46,7 +47,10 @@
sk.update(1L);
sk.update(2.0);
sk.update("3");
- sk.update(new byte[] { 4 });
+ byte[] bytes = new byte[] { 4, 4 };
+ sk.update(bytes);
+ sk.update(ByteBuffer.wrap(bytes)); // same as previous
+ sk.update(ByteBuffer.wrap(bytes, 0, 1));
sk.update(new char[] { 5 });
sk.update(new int[] { 6 });
sk.update(new long[] { 7 });
@@ -106,6 +110,7 @@
est = (int) Math.round(sk.getEstimate());
assertEquals(est, 1);
barr = new byte[0];
+ sk.update(barr);
est = (int) Math.round(sk.getEstimate());
assertEquals(est, 1);
diff --git a/src/test/java/org/apache/datasketches/theta/DirectUnionTest.java b/src/test/java/org/apache/datasketches/theta/DirectUnionTest.java
index 02f0e6b..5506090 100644
--- a/src/test/java/org/apache/datasketches/theta/DirectUnionTest.java
+++ b/src/test/java/org/apache/datasketches/theta/DirectUnionTest.java
@@ -29,6 +29,7 @@
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
+import java.nio.ByteBuffer;
import java.util.Arrays;
import org.apache.datasketches.common.Family;
@@ -706,22 +707,24 @@
union.update(byteArr); //empty byte[]
byteArr = "Byte Array".getBytes(UTF_8);
union.update(byteArr); //#3 actual byte[]
+ union.update(ByteBuffer.wrap(byteArr)); // same as previous
+ union.update(ByteBuffer.wrap(byteArr, 0, 4)); // #4 byte slice
int[] intArr = null;
union.update(intArr); //null int[]
intArr = new int[0];
union.update(intArr); //empty int[]
final int[] intArr2 = { 1, 2, 3, 4, 5 };
- union.update(intArr2); //#4 actual int[]
+ union.update(intArr2); //#5 actual int[]
long[] longArr = null;
union.update(longArr); //null long[]
longArr = new long[0];
union.update(longArr); //empty long[]
final long[] longArr2 = { 6, 7, 8, 9 };
- union.update(longArr2); //#5 actual long[]
+ union.update(longArr2); //#6 actual long[]
final CompactSketch comp = union.getResult();
final double est = comp.getEstimate();
final boolean empty = comp.isEmpty();
- assertEquals(est, 7.0, 0.0);
+ assertEquals(est, 8.0, 0.0);
assertFalse(empty);
}
diff --git a/src/test/java/org/apache/datasketches/theta/HeapUnionTest.java b/src/test/java/org/apache/datasketches/theta/HeapUnionTest.java
index d9a37e3..c92b759 100644
--- a/src/test/java/org/apache/datasketches/theta/HeapUnionTest.java
+++ b/src/test/java/org/apache/datasketches/theta/HeapUnionTest.java
@@ -26,6 +26,7 @@
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
+import java.nio.ByteBuffer;
import java.util.Arrays;
import org.apache.datasketches.common.Family;
@@ -597,34 +598,37 @@
union.update(byteArr); //null byte[]
byteArr = new byte[0];
union.update(byteArr); //empty byte[]
+ union.update(ByteBuffer.wrap(byteArr)); // empty ByteBuffer
byteArr = "Byte Array".getBytes(UTF_8);
union.update(byteArr); //#5 actual byte[]
+ union.update(ByteBuffer.wrap(byteArr)); // same as previous
+ union.update(ByteBuffer.wrap(byteArr, 0, 4)); // #6 byte slice
char[] charArr = null;
union.update(charArr); //null char[]
charArr = new char[0];
union.update(charArr); //empty char[]
charArr = "String".toCharArray();
- union.update(charArr); //#6 actual char[]
+ union.update(charArr); //#7 actual char[]
int[] intArr = null;
union.update(intArr); //null int[]
intArr = new int[0];
union.update(intArr); //empty int[]
final int[] intArr2 = { 1, 2, 3, 4, 5 };
- union.update(intArr2); //#7 actual int[]
+ union.update(intArr2); //#8 actual int[]
long[] longArr = null;
union.update(longArr); //null long[]
longArr = new long[0];
union.update(longArr); //empty long[]
final long[] longArr2 = { 6, 7, 8, 9 };
- union.update(longArr2); //#8 actual long[]
+ union.update(longArr2); //#9 actual long[]
final CompactSketch comp = union.getResult();
final double est = comp.getEstimate();
final boolean empty = comp.isEmpty();
- assertEquals(est, 8.0, 0.0);
+ assertEquals(est, 9.0, 0.0);
assertFalse(empty);
}
diff --git a/src/test/java/org/apache/datasketches/theta/UpdateSketchTest.java b/src/test/java/org/apache/datasketches/theta/UpdateSketchTest.java
index c92840b..331d6e2 100644
--- a/src/test/java/org/apache/datasketches/theta/UpdateSketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/UpdateSketchTest.java
@@ -34,11 +34,15 @@
import org.apache.datasketches.common.ResizeFactor;
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.memory.DefaultMemoryRequestServer;
+import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.MemoryRequestServer;
import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.thetacommon.ThetaUtil;
import org.testng.annotations.Test;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
/**
* @author Lee Rhodes
*/
@@ -63,32 +67,35 @@
sk1.update(byteArr); //null byte[]
byteArr = new byte[0];
sk1.update(byteArr); //empty byte[]
+ sk1.update(ByteBuffer.wrap(byteArr)); // empty byte[]
byteArr = "Byte Array".getBytes(UTF_8);
sk1.update(byteArr); //#5 actual byte[]
+ sk1.update(ByteBuffer.wrap(byteArr, 0, 10)); // whole byte array
+ sk1.update(ByteBuffer.wrap(byteArr, 2, 6)); // #6 byte array slice
char[] charArr = null;
sk1.update(charArr); //null char[]
charArr = new char[0];
sk1.update(charArr); //empty char[]
charArr = "String".toCharArray();
- sk1.update(charArr); //#6 actual char[]
+ sk1.update(charArr); //#7 actual char[]
int[] intArr = null;
sk1.update(intArr); //null int[]
intArr = new int[0];
sk1.update(intArr); //empty int[]
int[] intArr2 = { 1, 2, 3, 4, 5 };
- sk1.update(intArr2); //#7 actual int[]
+ sk1.update(intArr2); //#8 actual int[]
long[] longArr = null;
sk1.update(longArr); //null long[]
longArr = new long[0];
sk1.update(longArr); //empty long[]
long[] longArr2 = { 6, 7, 8, 9 };
- sk1.update(longArr2); //#8 actual long[]
+ sk1.update(longArr2); //#9 actual long[]
double est = sk1.getEstimate();
- assertEquals(est, 8.0, 0.0);
+ assertEquals(est, 9.0, 0.0);
}
@Test
diff --git a/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleTest.java b/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleTest.java
index 8709bb0..a853b6d 100644
--- a/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleTest.java
+++ b/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleTest.java
@@ -33,6 +33,9 @@
import org.testng.Assert;
import org.testng.annotations.Test;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
public class AdoubleTest {
private final DoubleSummary.Mode mode = Mode.Sum;
@@ -216,14 +219,16 @@
new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
sketch.update(1L, 1.0);
sketch.update(2.0, 1.0);
- final byte[] bytes = { 3 };
+ final byte[] bytes = { 3, 3 };
sketch.update(bytes, 1.0);
+ sketch.update(ByteBuffer.wrap(bytes), 1.0); // same as previous
+ sketch.update(ByteBuffer.wrap(bytes, 0, 1), 1.0); // slice of previous
final int[] ints = { 4 };
sketch.update(ints, 1.0);
final long[] longs = { 5L };
sketch.update(longs, 1.0);
sketch.update("a", 1.0);
- Assert.assertEquals(sketch.getEstimate(), 6.0);
+ Assert.assertEquals(sketch.getEstimate(), 7.0);
}
@Test
diff --git a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java
index 86ee785..dfb0c1f 100644
--- a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java
+++ b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java
@@ -21,10 +21,14 @@
import org.apache.datasketches.common.ResizeFactor;
import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
import org.testng.Assert;
import org.testng.annotations.Test;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
public class DirectArrayOfDoublesQuickSelectSketchTest {
@Test
public void isEmpty() {
@@ -178,11 +182,14 @@
build(WritableMemory.writableWrap(new byte[1000000]));
sketch.update(1L, new double[] {1.0});
sketch.update(2.0, new double[] {1.0});
- sketch.update(new byte[] {3}, new double[] {1.0});
+ final byte[] bytes = new byte[] {3, 4};
+ sketch.update(bytes, new double[] {1.0});
+ sketch.update(ByteBuffer.wrap(bytes), new double[] {1.0}); // same as previous
+ sketch.update(ByteBuffer.wrap(bytes, 0, 1), new double[] {1.0}); // slice
sketch.update(new int[] {4}, new double[] {1.0});
sketch.update(new long[] {5L}, new double[] {1.0});
sketch.update("a", new double[] {1.0});
- Assert.assertEquals(sketch.getEstimate(), 6.0);
+ Assert.assertEquals(sketch.getEstimate(), 7.0);
}
@Test