Merge pull request #14 from DataSketches/quantiles
Quantiles Sketch UDFs
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentile.java b/src/main/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentile.java
index 7ffc771..58a3535 100644
--- a/src/main/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentile.java
+++ b/src/main/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentile.java
@@ -9,7 +9,6 @@
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import org.apache.pig.data.TupleFactory;
import com.yahoo.sketches.memory.NativeMemory;
import com.yahoo.sketches.quantiles.QuantilesSketch;
@@ -27,12 +26,12 @@
* distribution (the number separating the higher half of a probability
* distribution from the lower half).
*/
-public class DoubleSummarySketchToPercentile extends EvalFunc<Tuple> {
+public class DoubleSummarySketchToPercentile extends EvalFunc<Double> {
private static final int QUANTILES_SKETCH_SIZE = 1024;
@Override
- public Tuple exec(Tuple input) throws IOException {
+ public Double exec(Tuple input) throws IOException {
if (input.size() != 2) throw new IllegalArgumentException("expected two inputs: sketch and pecentile");
DataByteArray dba = (DataByteArray) input.get(0);
@@ -46,9 +45,7 @@
while (it.next()) {
qs.update(it.getSummary().getValue());
}
- Tuple output = TupleFactory.getInstance().newTuple(1);
- output.set(0, qs.getQuantile(percentile / 100));
- return output;
+ return qs.getQuantile(percentile / 100);
}
}
diff --git a/src/test/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentileTest.java b/src/test/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentileTest.java
index 99123d6..90a04f7 100644
--- a/src/test/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentileTest.java
+++ b/src/test/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentileTest.java
@@ -6,9 +6,13 @@
import org.testng.annotations.Test;
import org.testng.Assert;
+
+import java.util.Arrays;
+
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
import com.yahoo.sketches.tuple.UpdatableSketch;
import com.yahoo.sketches.tuple.DoubleSummary;
@@ -18,38 +22,34 @@
public class DoubleSummarySketchToPercentileTest {
@Test
public void emptySketch() throws Exception {
- EvalFunc<Tuple> func = new DoubleSummarySketchToPercentile();
+ EvalFunc<Double> func = new DoubleSummarySketchToPercentile();
UpdatableSketch<Double, DoubleSummary> sketch = new UpdatableSketchBuilder<Double, DoubleSummary>(new DoubleSummaryFactory()).build();
- Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()), 0.0);
- Tuple resultTuple = func.exec(inputTuple);
- Assert.assertNotNull(resultTuple);
- Assert.assertEquals(resultTuple.size(), 1);
- Assert.assertEquals(resultTuple.get(0), Double.POSITIVE_INFINITY);
+ Tuple inputTuple = TupleFactory.getInstance().newTuple(Arrays.asList(new DataByteArray(sketch.compact().toByteArray()), 0.0));
+ double result = func.exec(inputTuple);
+ Assert.assertEquals(result, Double.POSITIVE_INFINITY);
}
@Test
public void normalCase() throws Exception {
- EvalFunc<Tuple> func = new DoubleSummarySketchToPercentile();
+ EvalFunc<Double> func = new DoubleSummarySketchToPercentile();
UpdatableSketch<Double, DoubleSummary> sketch = new UpdatableSketchBuilder<Double, DoubleSummary>(new DoubleSummaryFactory()).build();
int iterations = 100000;
for (int i = 0; i < iterations; i++) sketch.update(i, (double) i);
for (int i = 0; i < iterations; i++) sketch.update(i, (double) i);
Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()), 50.0);
- Tuple resultTuple = func.exec(inputTuple);
- Assert.assertNotNull(resultTuple);
- Assert.assertEquals(resultTuple.size(), 1);
- Assert.assertEquals((double) resultTuple.get(0), iterations, iterations * 0.02);
+ double result = func.exec(inputTuple);
+ Assert.assertEquals(result, iterations, iterations * 0.02);
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void wrongNumberOfInputs() throws Exception {
- EvalFunc<Tuple> func = new DoubleSummarySketchToPercentile();
+ EvalFunc<Double> func = new DoubleSummarySketchToPercentile();
func.exec(PigUtil.objectsToTuple(1.0));
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void percentileOutOfRange() throws Exception {
- EvalFunc<Tuple> func = new DoubleSummarySketchToPercentile();
+ EvalFunc<Double> func = new DoubleSummarySketchToPercentile();
UpdatableSketch<Double, DoubleSummary> sketch = new UpdatableSketchBuilder<Double, DoubleSummary>(new DoubleSummaryFactory()).build();
func.exec(PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()), 200.0));
}