Merge pull request #37 from DataSketches/kll-cdf
GetCdfFromSketchUDF
diff --git a/src/main/java/com/yahoo/sketches/hive/kll/GetNFromSketchUDF.java b/src/main/java/com/yahoo/sketches/hive/kll/GetNFromSketchUDF.java
new file mode 100644
index 0000000..e708d58
--- /dev/null
+++ b/src/main/java/com/yahoo/sketches/hive/kll/GetNFromSketchUDF.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019, Verizon Media.
+ * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ */
+
+package com.yahoo.sketches.hive.kll;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.BytesWritable;
+
+import com.yahoo.memory.Memory;
+import com.yahoo.sketches.kll.KllFloatsSketch;
+
+@Description(name = "GetN", value = "_FUNC_(sketch)",
+extended = " Returns the total number of observed input values (stream length) from a given KllFloatsSketch.")
+public class GetNFromSketchUDF extends UDF {
+
+ /**
+ * Returns N from a given sketch
+ * @param serializedSketch serialized sketch
+ * @return stream length
+ */
+ public Long evaluate(final BytesWritable serializedSketch) {
+ if (serializedSketch == null) { return null; }
+ final KllFloatsSketch sketch = KllFloatsSketch.heapify(Memory.wrap(serializedSketch.getBytes()));
+ return sketch.getN();
+ }
+
+}
diff --git a/src/main/java/com/yahoo/sketches/hive/quantiles/GetNFromDoublesSketchUDF.java b/src/main/java/com/yahoo/sketches/hive/quantiles/GetNFromDoublesSketchUDF.java
new file mode 100644
index 0000000..93b0b69
--- /dev/null
+++ b/src/main/java/com/yahoo/sketches/hive/quantiles/GetNFromDoublesSketchUDF.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019, Verizon Media.
+ * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ */
+
+package com.yahoo.sketches.hive.quantiles;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.BytesWritable;
+
+import com.yahoo.memory.Memory;
+import com.yahoo.sketches.quantiles.DoublesSketch;
+
+@Description(name = "GetN", value = "_FUNC_(sketch)",
+extended = " Returns the total number of observed input values (stream length) from a given DoublesSketch.")
+public class GetNFromDoublesSketchUDF extends UDF {
+
+ /**
+ * Returns N from a given sketch
+ * @param serializedSketch serialized sketch
+ * @return stream length
+ */
+ public Long evaluate(final BytesWritable serializedSketch) {
+ if (serializedSketch == null) { return null; }
+ final DoublesSketch sketch = DoublesSketch.wrap(Memory.wrap(serializedSketch.getBytes()));
+ return sketch.getN();
+ }
+
+}
diff --git a/src/main/java/com/yahoo/sketches/hive/quantiles/GetNFromStringsSketchUDF.java b/src/main/java/com/yahoo/sketches/hive/quantiles/GetNFromStringsSketchUDF.java
new file mode 100644
index 0000000..67d1772
--- /dev/null
+++ b/src/main/java/com/yahoo/sketches/hive/quantiles/GetNFromStringsSketchUDF.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2019, Verizon Media.
+ * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ */
+
+package com.yahoo.sketches.hive.quantiles;
+
+import java.util.Comparator;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.BytesWritable;
+
+import com.yahoo.memory.Memory;
+import com.yahoo.sketches.ArrayOfStringsSerDe;
+import com.yahoo.sketches.quantiles.ItemsSketch;
+
+@Description(name = "GetN", value = "_FUNC_(sketch)",
+extended = " Returns the total number of observed input values (stream length) from a given ItemsSketch<String>.")
+public class GetNFromStringsSketchUDF extends UDF {
+
+ /**
+ * Returns N from a given sketch
+ * @param serializedSketch serialized sketch
+ * @return stream length
+ */
+ public Long evaluate(final BytesWritable serializedSketch) {
+ if (serializedSketch == null) { return null; }
+ final ItemsSketch<String> sketch = ItemsSketch.getInstance(
+ Memory.wrap(serializedSketch.getBytes()),
+ Comparator.naturalOrder(),
+ new ArrayOfStringsSerDe()
+ );
+ return sketch.getN();
+ }
+
+}
diff --git a/src/test/java/com/yahoo/sketches/hive/kll/GetNFromSektchUDFTest.java b/src/test/java/com/yahoo/sketches/hive/kll/GetNFromSektchUDFTest.java
new file mode 100644
index 0000000..c0573f9
--- /dev/null
+++ b/src/test/java/com/yahoo/sketches/hive/kll/GetNFromSektchUDFTest.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2019, Verizon Media.
+ * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ */
+
+package com.yahoo.sketches.hive.kll;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import com.yahoo.sketches.kll.KllFloatsSketch;
+
+public class GetNFromSektchUDFTest {
+
+ @Test
+ public void nullSketch() {
+ final Long result = new GetNFromSketchUDF().evaluate(null);
+ Assert.assertNull(result);
+ }
+
+ @Test
+ public void normalCase() {
+ KllFloatsSketch sketch = new KllFloatsSketch();
+ sketch.update(1);
+ sketch.update(2);
+ sketch.update(3);
+ sketch.update(4);
+ final Long result = new GetNFromSketchUDF().evaluate(new BytesWritable(sketch.toByteArray()));
+ Assert.assertNotNull(result);
+ Assert.assertEquals(result, Long.valueOf(4));
+ }
+
+}
diff --git a/src/test/java/com/yahoo/sketches/hive/quantiles/GetNFromDoublesSektchUDFTest.java b/src/test/java/com/yahoo/sketches/hive/quantiles/GetNFromDoublesSektchUDFTest.java
new file mode 100644
index 0000000..4cb0fcd
--- /dev/null
+++ b/src/test/java/com/yahoo/sketches/hive/quantiles/GetNFromDoublesSektchUDFTest.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2019, Verizon Media.
+ * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ */
+
+package com.yahoo.sketches.hive.quantiles;
+
+import com.yahoo.sketches.quantiles.DoublesSketch;
+import com.yahoo.sketches.quantiles.UpdateDoublesSketch;
+
+import org.testng.annotations.Test;
+import org.apache.hadoop.io.BytesWritable;
+import org.testng.Assert;
+
+public class GetNFromDoublesSektchUDFTest {
+
+ @Test
+ public void nullSketch() {
+ Long result = new GetNFromDoublesSketchUDF().evaluate(null);
+ Assert.assertNull(result);
+ }
+
+ @Test
+ public void normalCase() {
+ UpdateDoublesSketch sketch = DoublesSketch.builder().build();
+ sketch.update(1);
+ sketch.update(2);
+ sketch.update(3);
+ Long result = new GetNFromDoublesSketchUDF().evaluate(new BytesWritable(sketch.toByteArray()));
+ Assert.assertNotNull(result);
+ Assert.assertEquals(result, Long.valueOf(3));
+ }
+
+}
diff --git a/src/test/java/com/yahoo/sketches/hive/quantiles/GetNFromStringsSketchUDFTest.java b/src/test/java/com/yahoo/sketches/hive/quantiles/GetNFromStringsSketchUDFTest.java
new file mode 100644
index 0000000..028cc45
--- /dev/null
+++ b/src/test/java/com/yahoo/sketches/hive/quantiles/GetNFromStringsSketchUDFTest.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2019, Verizon Media.
+ * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ */
+
+package com.yahoo.sketches.hive.quantiles;
+
+import java.util.Comparator;
+
+import org.apache.hadoop.io.BytesWritable;
+
+import com.yahoo.sketches.ArrayOfItemsSerDe;
+import com.yahoo.sketches.ArrayOfStringsSerDe;
+import com.yahoo.sketches.quantiles.ItemsSketch;
+
+import org.testng.annotations.Test;
+import org.testng.Assert;
+
+public class GetNFromStringsSketchUDFTest {
+
+ static final Comparator<String> comparator = Comparator.naturalOrder();
+ static final ArrayOfItemsSerDe<String> serDe = new ArrayOfStringsSerDe();
+
+ @Test
+ public void nullSketch() {
+ Long result = new GetNFromStringsSketchUDF().evaluate(null);
+ Assert.assertNull(result);
+ }
+
+ @Test
+ public void normalCase() {
+ ItemsSketch<String> sketch = ItemsSketch.getInstance(comparator);
+ sketch.update("a");
+ sketch.update("b");
+ sketch.update("c");
+ Long result = new GetNFromStringsSketchUDF().evaluate(new BytesWritable(sketch.toByteArray(serDe)));
+ Assert.assertNotNull(result);
+ Assert.assertEquals(result, Long.valueOf(3));
+ }
+
+}