new tuple sketch UDFs
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToEstimateAndErrorBounds.java b/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToEstimateAndErrorBounds.java
new file mode 100644
index 0000000..13baefb
--- /dev/null
+++ b/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToEstimateAndErrorBounds.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2017, Yahoo! Inc.
+ * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ */
+
+package com.yahoo.sketches.pig.tuple;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+
+import com.yahoo.memory.Memory;
+import com.yahoo.sketches.tuple.ArrayOfDoublesSketch;
+import com.yahoo.sketches.tuple.ArrayOfDoublesSketches;
+
+/**
+ * This is a User Defined Function (UDF) for obtaining the unique count estimate
+ * along with a lower and upper bound from an ArrayOfDoublesSketch.
+ *
+ * <p>The result is a tuple with three double values: estimate, lower bound and upper bound.
+ * The bounds are given at 95.5% confidence.
+ *
+ * @author Alexander Saydakov
+ */
+public class ArrayOfDoublesSketchToEstimateAndErrorBounds extends EvalFunc<Tuple> {
+
+ @Override
+ public Tuple exec(final Tuple input) throws IOException {
+ if ((input == null) || (input.size() == 0)) {
+ return null;
+ }
+
+ final DataByteArray dba = (DataByteArray) input.get(0);
+ final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get()));
+
+ return TupleFactory.getInstance().newTuple(Arrays.asList(
+ sketch.getEstimate(),
+ sketch.getLowerBound(2),
+ sketch.getUpperBound(2)
+ ));
+ }
+
+}
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToNumberOfRetainedEntries.java b/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToNumberOfRetainedEntries.java
new file mode 100644
index 0000000..67cab37
--- /dev/null
+++ b/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToNumberOfRetainedEntries.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2017, Yahoo! Inc.
+ * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ */
+
+package com.yahoo.sketches.pig.tuple;
+
+import java.io.IOException;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+
+import com.yahoo.memory.Memory;
+import com.yahoo.sketches.tuple.ArrayOfDoublesSketch;
+import com.yahoo.sketches.tuple.ArrayOfDoublesSketches;
+
+/**
+ * This is a User Defined Function (UDF) for obtaining the number of retained entries
+ * from an ArrayOfDoublesSketch.
+ *
+ * <p>The result is an integer value.
+ *
+ * @author Alexander Saydakov
+ */
+public class ArrayOfDoublesSketchToNumberOfRetainedEntries extends EvalFunc<Integer> {
+
+ @Override
+ public Integer exec(final Tuple input) throws IOException {
+ if ((input == null) || (input.size() == 0)) {
+ return null;
+ }
+
+ final DataByteArray dba = (DataByteArray) input.get(0);
+ final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get()));
+
+ return sketch.getRetainedEntries();
+ }
+
+}
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToQuantilesSketch.java b/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToQuantilesSketch.java
new file mode 100644
index 0000000..35470b7
--- /dev/null
+++ b/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToQuantilesSketch.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2017, Yahoo! Inc.
+ * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ */
+
+package com.yahoo.sketches.pig.tuple;
+
+import java.io.IOException;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+
+import com.yahoo.memory.Memory;
+import com.yahoo.sketches.quantiles.DoublesSketchBuilder;
+import com.yahoo.sketches.quantiles.UpdateDoublesSketch;
+import com.yahoo.sketches.tuple.ArrayOfDoublesSketch;
+import com.yahoo.sketches.tuple.ArrayOfDoublesSketchIterator;
+import com.yahoo.sketches.tuple.ArrayOfDoublesSketches;
+
+/**
+ * This UDF converts a given column of double values from an ArrayOfDoubles sketch
+ * to a quantiles DoublesSketch to further analyze the distribution of these values.
+ * The result will be a DataByteArray with serialized quantiles sketch.
+ */
+public class ArrayOfDoublesSketchToQuantilesSketch extends EvalFunc<DataByteArray> {
+
+ private final int k;
+
+ /**
+ * Constructor with default parameter k for quantiles sketch
+ */
+ public ArrayOfDoublesSketchToQuantilesSketch() {
+ k = 0;
+ }
+
+ /**
+ * Constructor with a given parameter k for quantiles sketch
+ * @param k parameter that determines the accuracy and size of the quantiles sketch
+ */
+ public ArrayOfDoublesSketchToQuantilesSketch(final int k) {
+ this.k = k;
+ }
+
+ @Override
+ public DataByteArray exec(final Tuple input) throws IOException {
+ if ((input == null) || (input.size() == 0)) {
+ return null;
+ }
+
+ final DataByteArray dba = (DataByteArray) input.get(0);
+ final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get()));
+
+ int column = 1;
+ if (input.size() > 1) {
+ column = (int) input.get(1);
+ if (column < 1 || column > sketch.getNumValues()) {
+ throw new IllegalArgumentException("Column number out of range. The given sketch has "
+ + sketch.getNumValues() + " columns");
+ }
+ }
+
+ final DoublesSketchBuilder builder = UpdateDoublesSketch.builder();
+ if (k > 0) {
+ builder.setK(k);
+ }
+ final UpdateDoublesSketch qs = builder.build();
+ final ArrayOfDoublesSketchIterator it = sketch.iterator();
+ while (it.next()) {
+ qs.update(it.getValues()[column - 1]);
+ }
+ return new DataByteArray(qs.compact().toByteArray());
+ }
+
+}
diff --git a/src/test/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToEstimateAndErrorBoundsTest.java b/src/test/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToEstimateAndErrorBoundsTest.java
new file mode 100644
index 0000000..17a1491
--- /dev/null
+++ b/src/test/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToEstimateAndErrorBoundsTest.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2017, Yahoo! Inc.
+ * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ */
+
+package com.yahoo.sketches.pig.tuple;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import org.junit.Assert;
+import org.testng.annotations.Test;
+
+import com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketch;
+import com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
+
+public class ArrayOfDoublesSketchToEstimateAndErrorBoundsTest {
+
+ static final TupleFactory tupleFactory = TupleFactory.getInstance();
+
+ @Test
+ public void nullInput() throws Exception {
+ EvalFunc<Tuple> func = new ArrayOfDoublesSketchToEstimateAndErrorBounds();
+ Tuple resultTuple = func.exec(null);
+ Assert.assertNull(resultTuple);
+ }
+
+ @Test
+ public void emptyInputTuple() throws Exception {
+ EvalFunc<Tuple> func = new ArrayOfDoublesSketchToEstimateAndErrorBounds();
+ Tuple resultTuple = func.exec(tupleFactory.newTuple());
+ Assert.assertNull(resultTuple);
+ }
+
+ @Test
+ public void emptyInputSketch() throws Exception {
+ EvalFunc<Tuple> func = new ArrayOfDoublesSketchToEstimateAndErrorBounds();
+ ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build();
+ Tuple resultTuple = func.exec(tupleFactory.newTuple(new DataByteArray(sketch.compact().toByteArray())));
+ Assert.assertNotNull(resultTuple);
+ Assert.assertEquals(resultTuple.size(), 3);
+ Assert.assertEquals(resultTuple.get(0), 0.0);
+ Assert.assertEquals(resultTuple.get(1), 0.0);
+ Assert.assertEquals(resultTuple.get(2), 0.0);
+ }
+
+ @Test
+ public void nonEmptyInputSketchExactMode() throws Exception {
+ EvalFunc<Tuple> func = new ArrayOfDoublesSketchToEstimateAndErrorBounds();
+ ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build();
+ sketch.update(1, new double[] {0});
+ Tuple resultTuple = func.exec(tupleFactory.newTuple(new DataByteArray(sketch.compact().toByteArray())));
+ Assert.assertNotNull(resultTuple);
+ Assert.assertEquals(resultTuple.size(), 3);
+ Assert.assertEquals(resultTuple.get(0), 1.0);
+ Assert.assertEquals(resultTuple.get(1), 1.0);
+ Assert.assertEquals(resultTuple.get(2), 1.0);
+ }
+
+ @Test
+ public void nonEmptyInputSketchEstimationMode() throws Exception {
+ EvalFunc<Tuple> func = new ArrayOfDoublesSketchToEstimateAndErrorBounds();
+ ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build();
+ int numKeys = 10000; // to saturate the sketch with default number of nominal entries (4K)
+ for (int i = 0; i < numKeys; i++ ) {
+ sketch.update(i, new double[] {0});
+ }
+ Tuple resultTuple = func.exec(tupleFactory.newTuple(new DataByteArray(sketch.compact().toByteArray())));
+ Assert.assertNotNull(resultTuple);
+ Assert.assertEquals(resultTuple.size(), 3);
+ double estimate = (double) resultTuple.get(0);
+ double lowerBound = (double) resultTuple.get(1);
+ double upperBound = (double) resultTuple.get(2);
+ Assert.assertEquals(estimate, numKeys, numKeys * 0.04);
+ Assert.assertEquals(lowerBound, numKeys, numKeys * 0.04);
+ Assert.assertEquals(upperBound, numKeys, numKeys * 0.04);
+ Assert.assertTrue(lowerBound < estimate);
+ Assert.assertTrue(upperBound > estimate);
+ }
+
+}
diff --git a/src/test/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToNumberOfRetainedEntriesTest.java b/src/test/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToNumberOfRetainedEntriesTest.java
new file mode 100644
index 0000000..0f0e910
--- /dev/null
+++ b/src/test/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToNumberOfRetainedEntriesTest.java
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2017, Yahoo! Inc.
+ * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ */
+
+package com.yahoo.sketches.pig.tuple;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.TupleFactory;
+import org.junit.Assert;
+import org.testng.annotations.Test;
+
+import com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketch;
+import com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
+
+public class ArrayOfDoublesSketchToNumberOfRetainedEntriesTest {
+
+ static final TupleFactory tupleFactory = TupleFactory.getInstance();
+
+ @Test
+ public void nullInput() throws Exception {
+ EvalFunc<Integer> func = new ArrayOfDoublesSketchToNumberOfRetainedEntries();
+ Integer result = func.exec(null);
+ Assert.assertNull(result);
+ }
+
+ @Test
+ public void emptyInputTuple() throws Exception {
+ EvalFunc<Integer> func = new ArrayOfDoublesSketchToNumberOfRetainedEntries();
+ Integer result = func.exec(tupleFactory.newTuple());
+ Assert.assertNull(result);
+ }
+
+ @Test
+ public void emptyInputSketch() throws Exception {
+ EvalFunc<Integer> func = new ArrayOfDoublesSketchToNumberOfRetainedEntries();
+ ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build();
+ Integer result = func.exec(tupleFactory.newTuple(new DataByteArray(sketch.compact().toByteArray())));
+ Assert.assertNotNull(result);
+ Assert.assertEquals((int) result, 0);
+ }
+
+ @Test
+ public void nonEmptyInputSketch() throws Exception {
+ EvalFunc<Integer> func = new ArrayOfDoublesSketchToNumberOfRetainedEntries();
+ ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build();
+ sketch.update(1, new double[] {0});
+ Integer result = func.exec(tupleFactory.newTuple(new DataByteArray(sketch.compact().toByteArray())));
+ Assert.assertNotNull(result);
+ Assert.assertEquals((int) result, 1);
+ }
+
+}
diff --git a/src/test/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToQuantilesSketchTest.java b/src/test/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToQuantilesSketchTest.java
new file mode 100644
index 0000000..4adef55
--- /dev/null
+++ b/src/test/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToQuantilesSketchTest.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2017, Yahoo! Inc.
+ * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ */
+
+package com.yahoo.sketches.pig.tuple;
+
+import java.util.Arrays;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.TupleFactory;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import com.yahoo.memory.Memory;
+import com.yahoo.sketches.quantiles.DoublesSketch;
+import com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketch;
+import com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
+
+public class ArrayOfDoublesSketchToQuantilesSketchTest {
+
+ static final TupleFactory tupleFactory = TupleFactory.getInstance();
+
+ @Test
+ public void nullInput() throws Exception {
+ EvalFunc<DataByteArray> func = new ArrayOfDoublesSketchToQuantilesSketch();
+ DataByteArray result = func.exec(null);
+ Assert.assertNull(result);
+ }
+
+ @Test
+ public void emptyInputTuple() throws Exception {
+ EvalFunc<DataByteArray> func = new ArrayOfDoublesSketchToQuantilesSketch();
+ DataByteArray result = func.exec(TupleFactory.getInstance().newTuple());
+ Assert.assertNull(result);
+ }
+
+ @Test
+ public void emptyInputSketch() throws Exception {
+ EvalFunc<DataByteArray> func = new ArrayOfDoublesSketchToQuantilesSketch();
+ ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build();
+ DataByteArray result = func.exec(tupleFactory.newTuple(new DataByteArray(sketch.compact().toByteArray())));
+ Assert.assertNotNull(result);
+ DoublesSketch quantilesSketch = DoublesSketch.wrap(Memory.wrap(result.get()));
+ Assert.assertTrue(quantilesSketch.isEmpty());
+ }
+
+ @Test
+ public void nonEmptyInputSketchWithTwoColumnsExplicitK() throws Exception {
+ int k = 256;
+ EvalFunc<DataByteArray> func = new ArrayOfDoublesSketchToQuantilesSketch(k);
+ ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build();
+ sketch.update(1, new double[] {1.0, 2.0});
+ sketch.update(2, new double[] {10.0, 20.0});
+ DataByteArray result = func.exec(tupleFactory.newTuple(Arrays.asList(
+ new DataByteArray(sketch.compact().toByteArray()),
+ 2
+ )));
+ Assert.assertNotNull(result);
+ DoublesSketch quantilesSketch = DoublesSketch.wrap(Memory.wrap(result.get()));
+ Assert.assertFalse(quantilesSketch.isEmpty());
+ Assert.assertEquals(quantilesSketch.getK(), k);
+ Assert.assertEquals(quantilesSketch.getMinValue(), 2.0);
+ Assert.assertEquals(quantilesSketch.getMaxValue(), 20.0);
+ }
+
+}