Merge pull request #37 from DataSketches/kll-cdf

GetCdfFromSketchUDF
diff --git a/src/main/java/com/yahoo/sketches/hive/kll/GetNFromSketchUDF.java b/src/main/java/com/yahoo/sketches/hive/kll/GetNFromSketchUDF.java
new file mode 100644
index 0000000..e708d58
--- /dev/null
+++ b/src/main/java/com/yahoo/sketches/hive/kll/GetNFromSketchUDF.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019, Verizon Media.
+ * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ */
+
+package com.yahoo.sketches.hive.kll;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.BytesWritable;
+
+import com.yahoo.memory.Memory;
+import com.yahoo.sketches.kll.KllFloatsSketch;
+
+@Description(name = "GetN", value = "_FUNC_(sketch)",
+extended = " Returns the total number of observed input values (stream length) from a given KllFloatsSketch.")
+public class GetNFromSketchUDF extends UDF {
+
+  /**
+   * Returns N from a given sketch
+   * @param serializedSketch serialized sketch
+   * @return stream length
+   */
+  public Long evaluate(final BytesWritable serializedSketch) {
+    if (serializedSketch == null) { return null; }
+    final KllFloatsSketch sketch = KllFloatsSketch.heapify(Memory.wrap(serializedSketch.getBytes()));
+    return sketch.getN();
+  }
+
+}
diff --git a/src/main/java/com/yahoo/sketches/hive/quantiles/GetNFromDoublesSketchUDF.java b/src/main/java/com/yahoo/sketches/hive/quantiles/GetNFromDoublesSketchUDF.java
new file mode 100644
index 0000000..93b0b69
--- /dev/null
+++ b/src/main/java/com/yahoo/sketches/hive/quantiles/GetNFromDoublesSketchUDF.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019, Verizon Media.
+ * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ */
+
+package com.yahoo.sketches.hive.quantiles;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.BytesWritable;
+
+import com.yahoo.memory.Memory;
+import com.yahoo.sketches.quantiles.DoublesSketch;
+
+@Description(name = "GetN", value = "_FUNC_(sketch)",
+extended = " Returns the total number of observed input values (stream length) from a given DoublesSketch.")
+public class GetNFromDoublesSketchUDF extends UDF {
+
+  /**
+   * Returns N from a given sketch
+   * @param serializedSketch serialized sketch
+   * @return stream length
+   */
+  public Long evaluate(final BytesWritable serializedSketch) {
+    if (serializedSketch == null) { return null; }
+    final DoublesSketch sketch = DoublesSketch.wrap(Memory.wrap(serializedSketch.getBytes()));
+    return sketch.getN();
+  }
+
+}
diff --git a/src/main/java/com/yahoo/sketches/hive/quantiles/GetNFromStringsSketchUDF.java b/src/main/java/com/yahoo/sketches/hive/quantiles/GetNFromStringsSketchUDF.java
new file mode 100644
index 0000000..67d1772
--- /dev/null
+++ b/src/main/java/com/yahoo/sketches/hive/quantiles/GetNFromStringsSketchUDF.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2019, Verizon Media.
+ * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ */
+
+package com.yahoo.sketches.hive.quantiles;
+
+import java.util.Comparator;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.BytesWritable;
+
+import com.yahoo.memory.Memory;
+import com.yahoo.sketches.ArrayOfStringsSerDe;
+import com.yahoo.sketches.quantiles.ItemsSketch;
+
+@Description(name = "GetN", value = "_FUNC_(sketch)",
+extended = " Returns the total number of observed input values (stream length) from a given ItemsSketch<String>.")
+public class GetNFromStringsSketchUDF extends UDF {
+
+  /**
+   * Returns N from a given sketch
+   * @param serializedSketch serialized sketch
+   * @return stream length
+   */
+  public Long evaluate(final BytesWritable serializedSketch) {
+    if (serializedSketch == null) { return null; }
+    final ItemsSketch<String> sketch = ItemsSketch.getInstance(
+      Memory.wrap(serializedSketch.getBytes()),
+      Comparator.naturalOrder(),
+      new ArrayOfStringsSerDe()
+    );
+    return sketch.getN();
+  }
+
+}
diff --git a/src/test/java/com/yahoo/sketches/hive/kll/GetNFromSektchUDFTest.java b/src/test/java/com/yahoo/sketches/hive/kll/GetNFromSektchUDFTest.java
new file mode 100644
index 0000000..c0573f9
--- /dev/null
+++ b/src/test/java/com/yahoo/sketches/hive/kll/GetNFromSektchUDFTest.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2019, Verizon Media.
+ * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ */
+
+package com.yahoo.sketches.hive.kll;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import com.yahoo.sketches.kll.KllFloatsSketch;
+
+public class GetNFromSektchUDFTest {
+
+  @Test
+  public void nullSketch() {
+    final Long result = new GetNFromSketchUDF().evaluate(null);
+    Assert.assertNull(result);
+  }
+
+  @Test
+  public void normalCase() {
+    KllFloatsSketch sketch = new KllFloatsSketch();
+    sketch.update(1);
+    sketch.update(2);
+    sketch.update(3);
+    sketch.update(4);
+    final Long result = new GetNFromSketchUDF().evaluate(new BytesWritable(sketch.toByteArray()));
+    Assert.assertNotNull(result);
+    Assert.assertEquals(result, Long.valueOf(4));
+  }
+
+}
diff --git a/src/test/java/com/yahoo/sketches/hive/quantiles/GetNFromDoublesSektchUDFTest.java b/src/test/java/com/yahoo/sketches/hive/quantiles/GetNFromDoublesSektchUDFTest.java
new file mode 100644
index 0000000..4cb0fcd
--- /dev/null
+++ b/src/test/java/com/yahoo/sketches/hive/quantiles/GetNFromDoublesSektchUDFTest.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2019, Verizon Media.
+ * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ */
+
+package com.yahoo.sketches.hive.quantiles;
+
+import com.yahoo.sketches.quantiles.DoublesSketch;
+import com.yahoo.sketches.quantiles.UpdateDoublesSketch;
+
+import org.testng.annotations.Test;
+import org.apache.hadoop.io.BytesWritable;
+import org.testng.Assert;
+
+public class GetNFromDoublesSektchUDFTest {
+
+  @Test
+  public void nullSketch() {
+    Long result = new GetNFromDoublesSketchUDF().evaluate(null);
+    Assert.assertNull(result);
+  }
+
+  @Test
+  public void normalCase() {
+    UpdateDoublesSketch sketch = DoublesSketch.builder().build();
+    sketch.update(1);
+    sketch.update(2);
+    sketch.update(3);
+    Long result = new GetNFromDoublesSketchUDF().evaluate(new BytesWritable(sketch.toByteArray()));
+    Assert.assertNotNull(result);
+    Assert.assertEquals(result, Long.valueOf(3));
+  }
+
+}
diff --git a/src/test/java/com/yahoo/sketches/hive/quantiles/GetNFromStringsSketchUDFTest.java b/src/test/java/com/yahoo/sketches/hive/quantiles/GetNFromStringsSketchUDFTest.java
new file mode 100644
index 0000000..028cc45
--- /dev/null
+++ b/src/test/java/com/yahoo/sketches/hive/quantiles/GetNFromStringsSketchUDFTest.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2019, Verizon Media.
+ * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ */
+
+package com.yahoo.sketches.hive.quantiles;
+
+import java.util.Comparator;
+
+import org.apache.hadoop.io.BytesWritable;
+
+import com.yahoo.sketches.ArrayOfItemsSerDe;
+import com.yahoo.sketches.ArrayOfStringsSerDe;
+import com.yahoo.sketches.quantiles.ItemsSketch;
+
+import org.testng.annotations.Test;
+import org.testng.Assert;
+
+public class GetNFromStringsSketchUDFTest {
+
+  static final Comparator<String> comparator = Comparator.naturalOrder();
+  static final ArrayOfItemsSerDe<String> serDe = new ArrayOfStringsSerDe();
+
+  @Test
+  public void nullSketch() {
+    Long result = new GetNFromStringsSketchUDF().evaluate(null);
+    Assert.assertNull(result);
+  }
+
+  @Test
+  public void normalCase() {
+    ItemsSketch<String> sketch = ItemsSketch.getInstance(comparator);
+    sketch.update("a");
+    sketch.update("b");
+    sketch.update("c");
+    Long result = new GetNFromStringsSketchUDF().evaluate(new BytesWritable(sketch.toByteArray(serDe)));
+    Assert.assertNotNull(result);
+    Assert.assertEquals(result, Long.valueOf(3));
+  }
+
+}