Merge pull request #14 from DataSketches/quantiles

Quantiles Sketch UDFs
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentile.java b/src/main/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentile.java
index 7ffc771..58a3535 100644
--- a/src/main/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentile.java
+++ b/src/main/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentile.java
@@ -9,7 +9,6 @@
 import org.apache.pig.EvalFunc;

 import org.apache.pig.data.DataByteArray;

 import org.apache.pig.data.Tuple;

-import org.apache.pig.data.TupleFactory;

 

 import com.yahoo.sketches.memory.NativeMemory;

 import com.yahoo.sketches.quantiles.QuantilesSketch;

@@ -27,12 +26,12 @@
  * distribution (the number separating the higher half of a probability

  * distribution from the lower half).

  */

-public class DoubleSummarySketchToPercentile extends EvalFunc<Tuple> {

+public class DoubleSummarySketchToPercentile extends EvalFunc<Double> {

 

   private static final int QUANTILES_SKETCH_SIZE = 1024;

 

   @Override

-  public Tuple exec(Tuple input) throws IOException {

+  public Double exec(Tuple input) throws IOException {

     if (input.size() != 2) throw new IllegalArgumentException("expected two inputs: sketch and pecentile");

 

     DataByteArray dba = (DataByteArray) input.get(0);

@@ -46,9 +45,7 @@
     while (it.next()) {

       qs.update(it.getSummary().getValue());

     }

-    Tuple output = TupleFactory.getInstance().newTuple(1);

-    output.set(0, qs.getQuantile(percentile / 100));

-    return output;

+    return qs.getQuantile(percentile / 100);

   }

 

 }

diff --git a/src/test/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentileTest.java b/src/test/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentileTest.java
index 99123d6..90a04f7 100644
--- a/src/test/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentileTest.java
+++ b/src/test/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentileTest.java
@@ -6,9 +6,13 @@
 

 import org.testng.annotations.Test;

 import org.testng.Assert;

+

+import java.util.Arrays;

+

 import org.apache.pig.EvalFunc;

 import org.apache.pig.data.DataByteArray;

 import org.apache.pig.data.Tuple;

+import org.apache.pig.data.TupleFactory;

 

 import com.yahoo.sketches.tuple.UpdatableSketch;

 import com.yahoo.sketches.tuple.DoubleSummary;

@@ -18,38 +22,34 @@
 public class DoubleSummarySketchToPercentileTest {

   @Test

   public void emptySketch() throws Exception {

-    EvalFunc<Tuple> func = new DoubleSummarySketchToPercentile();

+    EvalFunc<Double> func = new DoubleSummarySketchToPercentile();

     UpdatableSketch<Double, DoubleSummary> sketch = new UpdatableSketchBuilder<Double, DoubleSummary>(new DoubleSummaryFactory()).build();

-    Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()), 0.0);

-    Tuple resultTuple = func.exec(inputTuple);

-    Assert.assertNotNull(resultTuple);

-    Assert.assertEquals(resultTuple.size(), 1);

-    Assert.assertEquals(resultTuple.get(0), Double.POSITIVE_INFINITY);

+    Tuple inputTuple = TupleFactory.getInstance().newTuple(Arrays.asList(new DataByteArray(sketch.compact().toByteArray()), 0.0));

+    double result = func.exec(inputTuple);

+    Assert.assertEquals(result, Double.POSITIVE_INFINITY);

   }

 

   @Test

   public void normalCase() throws Exception {

-    EvalFunc<Tuple> func = new DoubleSummarySketchToPercentile();

+    EvalFunc<Double> func = new DoubleSummarySketchToPercentile();

     UpdatableSketch<Double, DoubleSummary> sketch = new UpdatableSketchBuilder<Double, DoubleSummary>(new DoubleSummaryFactory()).build();

     int iterations = 100000;

     for (int i = 0; i < iterations; i++) sketch.update(i, (double) i);

     for (int i = 0; i < iterations; i++) sketch.update(i, (double) i);

     Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()), 50.0);

-    Tuple resultTuple = func.exec(inputTuple);

-    Assert.assertNotNull(resultTuple);

-    Assert.assertEquals(resultTuple.size(), 1);

-    Assert.assertEquals((double) resultTuple.get(0), iterations, iterations * 0.02);

+    double result = func.exec(inputTuple);

+    Assert.assertEquals(result, iterations, iterations * 0.02);

   }

 

   @Test(expectedExceptions = IllegalArgumentException.class)

   public void wrongNumberOfInputs() throws Exception {

-    EvalFunc<Tuple> func = new DoubleSummarySketchToPercentile();

+    EvalFunc<Double> func = new DoubleSummarySketchToPercentile();

     func.exec(PigUtil.objectsToTuple(1.0));

   }

 

   @Test(expectedExceptions = IllegalArgumentException.class)

   public void percentileOutOfRange() throws Exception {

-    EvalFunc<Tuple> func = new DoubleSummarySketchToPercentile();

+    EvalFunc<Double> func = new DoubleSummarySketchToPercentile();

     UpdatableSketch<Double, DoubleSummary> sketch = new UpdatableSketchBuilder<Double, DoubleSummary>(new DoubleSummaryFactory()).build();

     func.exec(PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()), 200.0));

   }