use new core and memory, use wrap instead of heapify, new tuple udfs
diff --git a/pom.xml b/pom.xml
index 32ac422..ecc9802 100644
--- a/pom.xml
+++ b/pom.xml
@@ -145,13 +145,13 @@
<dependency>
<groupId>com.yahoo.datasketches</groupId>
<artifactId>sketches-core</artifactId>
- <version>0.10.0</version>
+ <version>0.10.1</version>
</dependency>
<dependency>
<groupId>com.yahoo.datasketches</groupId>
<artifactId>memory</artifactId>
- <version>0.10.2</version>
+ <version>0.10.3</version>
</dependency>
<!-- Pig -->
diff --git a/src/main/java/com/yahoo/sketches/pig/hll/AlgebraicFinal.java b/src/main/java/com/yahoo/sketches/pig/hll/AlgebraicFinal.java
index edb84b7..a89ba27 100644
--- a/src/main/java/com/yahoo/sketches/pig/hll/AlgebraicFinal.java
+++ b/src/main/java/com/yahoo/sketches/pig/hll/AlgebraicFinal.java
@@ -77,7 +77,7 @@
// due to system bagged outputs from multiple mapper Intermediate functions.
// Each dataTuple.DBA:sketch will merged into the union.
final DataByteArray dba = (DataByteArray) f0;
- union.update(HllSketch.heapify(Memory.wrap(dba.get())));
+ union.update(HllSketch.wrap(Memory.wrap(dba.get())));
} else { // we should never get here
throw new IllegalArgumentException("dataTuple.Field0 is not a DataBag or DataByteArray: "
+ f0.getClass().getName());
diff --git a/src/main/java/com/yahoo/sketches/pig/hll/AlgebraicIntermediate.java b/src/main/java/com/yahoo/sketches/pig/hll/AlgebraicIntermediate.java
index 48b49aa..045f3ce 100644
--- a/src/main/java/com/yahoo/sketches/pig/hll/AlgebraicIntermediate.java
+++ b/src/main/java/com/yahoo/sketches/pig/hll/AlgebraicIntermediate.java
@@ -79,7 +79,7 @@
// due to system bagged outputs from multiple mapper Intermediate functions.
// Each dataTuple.DBA:sketch will merged into the union.
final DataByteArray dba = (DataByteArray) f0;
- union.update(HllSketch.heapify(Memory.wrap(dba.get())));
+ union.update(HllSketch.wrap(Memory.wrap(dba.get())));
} else { // we should never get here
throw new IllegalArgumentException("dataTuple.Field0 is not a DataBag or DataByteArray: "
+ f0.getClass().getName());
diff --git a/src/main/java/com/yahoo/sketches/pig/hll/SketchToEstimate.java b/src/main/java/com/yahoo/sketches/pig/hll/SketchToEstimate.java
index 1e4a3c7..6fa0c80 100644
--- a/src/main/java/com/yahoo/sketches/pig/hll/SketchToEstimate.java
+++ b/src/main/java/com/yahoo/sketches/pig/hll/SketchToEstimate.java
@@ -11,6 +11,7 @@
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
+import com.yahoo.memory.Memory;
import com.yahoo.sketches.hll.HllSketch;
/**
@@ -26,7 +27,7 @@
return null;
}
final DataByteArray dba = (DataByteArray) sketchTuple.get(0);
- final HllSketch sketch = HllSketch.heapify(dba.get());
+ final HllSketch sketch = HllSketch.wrap(Memory.wrap(dba.get()));
return sketch.getEstimate();
}
diff --git a/src/main/java/com/yahoo/sketches/pig/hll/SketchToEstimateAndErrorBounds.java b/src/main/java/com/yahoo/sketches/pig/hll/SketchToEstimateAndErrorBounds.java
index 798cea2..94374aa 100644
--- a/src/main/java/com/yahoo/sketches/pig/hll/SketchToEstimateAndErrorBounds.java
+++ b/src/main/java/com/yahoo/sketches/pig/hll/SketchToEstimateAndErrorBounds.java
@@ -15,6 +15,7 @@
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
+import com.yahoo.memory.Memory;
import com.yahoo.sketches.hll.HllSketch;
/**
@@ -34,7 +35,7 @@
return null;
}
final DataByteArray dba = (DataByteArray) sketchTuple.get(0);
- final HllSketch sketch = HllSketch.heapify(dba.get());
+ final HllSketch sketch = HllSketch.wrap(Memory.wrap(dba.get()));
final Tuple outputTuple = TupleFactory.getInstance().newTuple(3);
outputTuple.set(0, Double.valueOf(sketch.getEstimate()));
outputTuple.set(1, Double.valueOf(sketch.getLowerBound(2)));
diff --git a/src/main/java/com/yahoo/sketches/pig/hll/SketchToString.java b/src/main/java/com/yahoo/sketches/pig/hll/SketchToString.java
index 1c6c9de..c0bdbcd 100644
--- a/src/main/java/com/yahoo/sketches/pig/hll/SketchToString.java
+++ b/src/main/java/com/yahoo/sketches/pig/hll/SketchToString.java
@@ -11,6 +11,7 @@
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
+import com.yahoo.memory.Memory;
import com.yahoo.sketches.hll.HllSketch;
/**
@@ -58,7 +59,7 @@
return null;
}
final DataByteArray dba = (DataByteArray) sketchTuple.get(0);
- final HllSketch sketch = HllSketch.heapify(dba.get());
+ final HllSketch sketch = HllSketch.wrap(Memory.wrap(dba.get()));
return sketch.toString(true, hllDetail_, auxDetail_);
}
diff --git a/src/main/java/com/yahoo/sketches/pig/hll/UnionSketch.java b/src/main/java/com/yahoo/sketches/pig/hll/UnionSketch.java
index 814c17b..7cae70e 100644
--- a/src/main/java/com/yahoo/sketches/pig/hll/UnionSketch.java
+++ b/src/main/java/com/yahoo/sketches/pig/hll/UnionSketch.java
@@ -189,7 +189,7 @@
final byte type = innerTuple.getType(0);
if (type == DataType.BYTEARRAY) {
final DataByteArray dba = (DataByteArray) f0;
- union.update(HllSketch.heapify(Memory.wrap(dba.get())));
+ union.update(HllSketch.wrap(Memory.wrap(dba.get())));
} else {
throw new IllegalArgumentException("Field type was not DataType.BYTEARRAY: " + type);
}
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/GetKFromDoublesSketch.java b/src/main/java/com/yahoo/sketches/pig/quantiles/GetKFromDoublesSketch.java
index e320f0f..71b1dfd 100644
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/GetKFromDoublesSketch.java
+++ b/src/main/java/com/yahoo/sketches/pig/quantiles/GetKFromDoublesSketch.java
@@ -32,7 +32,7 @@
+ input.get(0).getClass().getSimpleName());
}
final DataByteArray dba = (DataByteArray) input.get(0);
- final DoublesSketch sketch = DoublesSketch.heapify(Memory.wrap(dba.get()));
+ final DoublesSketch sketch = DoublesSketch.wrap(Memory.wrap(dba.get()));
return sketch.getK();
}
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/GetPmfFromDoublesSketch.java b/src/main/java/com/yahoo/sketches/pig/quantiles/GetPmfFromDoublesSketch.java
index 9bebd91..31b5c6c 100644
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/GetPmfFromDoublesSketch.java
+++ b/src/main/java/com/yahoo/sketches/pig/quantiles/GetPmfFromDoublesSketch.java
@@ -36,7 +36,7 @@
+ input.get(0).getClass().getSimpleName());
}
final DataByteArray dba = (DataByteArray) input.get(0);
- final DoublesSketch sketch = DoublesSketch.heapify(Memory.wrap(dba.get()));
+ final DoublesSketch sketch = DoublesSketch.wrap(Memory.wrap(dba.get()));
final double[] splitPoints = new double[input.size() - 1];
for (int i = 1; i < input.size(); i++) {
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/GetQuantileFromDoublesSketch.java b/src/main/java/com/yahoo/sketches/pig/quantiles/GetQuantileFromDoublesSketch.java
index 0802f0e..97d7060 100644
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/GetQuantileFromDoublesSketch.java
+++ b/src/main/java/com/yahoo/sketches/pig/quantiles/GetQuantileFromDoublesSketch.java
@@ -34,7 +34,7 @@
+ input.get(0).getClass().getSimpleName());
}
final DataByteArray dba = (DataByteArray) input.get(0);
- final DoublesSketch sketch = DoublesSketch.heapify(Memory.wrap(dba.get()));
+ final DoublesSketch sketch = DoublesSketch.wrap(Memory.wrap(dba.get()));
if (!(input.get(1) instanceof Double)) {
throw new IllegalArgumentException("expected a double value as a fraction, got "
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/GetQuantilesFromDoublesSketch.java b/src/main/java/com/yahoo/sketches/pig/quantiles/GetQuantilesFromDoublesSketch.java
index 6f16db7..bb6ab80 100644
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/GetQuantilesFromDoublesSketch.java
+++ b/src/main/java/com/yahoo/sketches/pig/quantiles/GetQuantilesFromDoublesSketch.java
@@ -36,7 +36,7 @@
+ input.get(0).getClass().getSimpleName());
}
final DataByteArray dba = (DataByteArray) input.get(0);
- final DoublesSketch sketch = DoublesSketch.heapify(Memory.wrap(dba.get()));
+ final DoublesSketch sketch = DoublesSketch.wrap(Memory.wrap(dba.get()));
if (input.size() == 2) {
final Object arg = input.get(1);
@@ -48,17 +48,17 @@
throw new IllegalArgumentException("expected a double value as a fraction or an integer value"
+ " as a number of evenly spaced intervals, got " + arg.getClass().getSimpleName());
}
- } else { // more than one number - must be double fractions
- final double[] fractions = new double[input.size() - 1];
- for (int i = 1; i < input.size(); i++) {
- if (!(input.get(i) instanceof Double)) {
- throw new IllegalArgumentException("expected a double value as a fraction, got "
- + input.get(i).getClass().getSimpleName());
- }
- fractions[i - 1] = (double) input.get(i);
- }
- return Util.doubleArrayToTuple(sketch.getQuantiles(fractions));
}
+ // more than one number - must be double fractions
+ final double[] fractions = new double[input.size() - 1];
+ for (int i = 1; i < input.size(); i++) {
+ if (!(input.get(i) instanceof Double)) {
+ throw new IllegalArgumentException("expected a double value as a fraction, got "
+ + input.get(i).getClass().getSimpleName());
+ }
+ fractions[i - 1] = (double) input.get(i);
+ }
+ return Util.doubleArrayToTuple(sketch.getQuantiles(fractions));
}
}
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchStats.java b/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchStats.java
new file mode 100644
index 0000000..1cb3dd0
--- /dev/null
+++ b/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchStats.java
@@ -0,0 +1,31 @@
+package com.yahoo.sketches.pig.tuple;
+
+import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
+
+import com.yahoo.sketches.tuple.ArrayOfDoublesSketch;
+import com.yahoo.sketches.tuple.ArrayOfDoublesSketchIterator;
+
+class ArrayOfDoublesSketchStats {
+
+ /**
+ * Convert sketch to summary statistics.
+ *
+ * @param sketch ArrayOfDoublesSketch to convert to summary statistics.
+ * @return An array of SummaryStatistics.
+ */
+ static SummaryStatistics[] sketchToSummaryStatistics(final ArrayOfDoublesSketch sketch) {
+ final SummaryStatistics[] summaryStatistics = new SummaryStatistics[sketch.getNumValues()];
+ for (int i = 0; i < sketch.getNumValues(); i++) {
+ summaryStatistics[i] = new SummaryStatistics();
+ }
+ final ArrayOfDoublesSketchIterator it = sketch.iterator();
+ while (it.next()) {
+ final double[] values = it.getValues();
+ for (int i = 0; i < it.getValues().length; i++) {
+ summaryStatistics[i].addValue(values[i]);
+ }
+ }
+ return summaryStatistics;
+ }
+
+}
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToMeans.java b/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToMeans.java
new file mode 100644
index 0000000..04efb9a
--- /dev/null
+++ b/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToMeans.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2017, Yahoo! Inc.
+ * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ */
+
+package com.yahoo.sketches.pig.tuple;
+
+import java.io.IOException;
+
+import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+
+import com.yahoo.memory.Memory;
+import com.yahoo.sketches.tuple.ArrayOfDoublesSketch;
+import com.yahoo.sketches.tuple.ArrayOfDoublesSketches;
+
+/**
+ * This UDF converts an ArrayOfDoubles sketch to mean values.
+ * The result will be a tuple with N double values, where
+ * N is the number of double values kept in the sketch per key.
+ */
+public class ArrayOfDoublesSketchToMeans extends EvalFunc<Tuple> {
+
+ @Override
+ public Tuple exec(final Tuple input) throws IOException {
+ if ((input == null) || (input.size() == 0)) {
+ return null;
+ }
+
+ final DataByteArray dba = (DataByteArray) input.get(0);
+ final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get()));
+
+ if (sketch.getRetainedEntries() < 1) {
+ return null;
+ }
+
+ final SummaryStatistics[] summaries = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketch);
+
+ final Tuple means = TupleFactory.getInstance().newTuple(sketch.getNumValues());
+ for (int i = 0; i < sketch.getNumValues(); i++) {
+ means.set(i, summaries[i].getMean());
+ }
+ return means;
+ }
+
+}
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToVariances.java b/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToVariances.java
new file mode 100644
index 0000000..6cab71a
--- /dev/null
+++ b/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToVariances.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2017, Yahoo! Inc.
+ * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ */
+
+package com.yahoo.sketches.pig.tuple;
+
+import java.io.IOException;
+
+import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+
+import com.yahoo.memory.Memory;
+import com.yahoo.sketches.tuple.ArrayOfDoublesSketch;
+import com.yahoo.sketches.tuple.ArrayOfDoublesSketches;
+
+/**
+ * This UDF converts an ArrayOfDoubles sketch to variance values.
+ * The result will be a tuple with N double values, where
+ * N is the number of double values kept in the sketch per key.
+ */
+public class ArrayOfDoublesSketchToVariances extends EvalFunc<Tuple> {
+
+ @Override
+ public Tuple exec(final Tuple input) throws IOException {
+ if ((input == null) || (input.size() == 0)) {
+ return null;
+ }
+
+ final DataByteArray dba = (DataByteArray) input.get(0);
+ final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get()));
+
+ if (sketch.getRetainedEntries() < 1) {
+ return null;
+ }
+
+ final SummaryStatistics[] summaries = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketch);
+
+ final Tuple variances = TupleFactory.getInstance().newTuple(sketch.getNumValues());
+ for (int i = 0; i < sketch.getNumValues(); i++) {
+ variances.set(i, summaries[i].getVariance());
+ }
+ return variances;
+ }
+
+}
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchesToPValueEstimates.java b/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchesToPValueEstimates.java
index d2d59b2..420b87c 100644
--- a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchesToPValueEstimates.java
+++ b/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchesToPValueEstimates.java
@@ -13,10 +13,10 @@
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
import com.yahoo.memory.Memory;
import com.yahoo.sketches.tuple.ArrayOfDoublesSketch;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketchIterator;
import com.yahoo.sketches.tuple.ArrayOfDoublesSketches;
/**
@@ -52,46 +52,18 @@
}
// Get the statistical summary from each sketch
- final SummaryStatistics[] summaryA = sketchToSummaryStatistics(sketchA, numMetrics);
- final SummaryStatistics[] summaryB = sketchToSummaryStatistics(sketchB, numMetrics);
+ final SummaryStatistics[] summariesA = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketchA);
+ final SummaryStatistics[] summariesB = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketchB);
// Calculate the p-values
- final double[] pValues = new double[numMetrics];
final TTest tTest = new TTest();
+ final Tuple pValues = TupleFactory.getInstance().newTuple(numMetrics);
for (int i = 0; i < numMetrics; i++) {
// Pass the sampled values for each metric
- pValues[i] = tTest.tTest(summaryA[i], summaryB[i]);
+ pValues.set(i, tTest.tTest(summariesA[i], summariesB[i]));
}
- return Util.doubleArrayToTuple(pValues);
+ return pValues;
}
-
- /**
- * Convert sketch to a summary statistic.
- *
- * @param sketch ArrayOfDoublesSketch to convert to a summary statistic.
- * @param numMetrics Number of metrics (values) in the ArrayOfDoublesSketch.
- * @return A summary statistic.
- */
- private static SummaryStatistics[] sketchToSummaryStatistics(final ArrayOfDoublesSketch sketch,
- final int numMetrics) {
- // Store a summary statistic object for each metric
- final SummaryStatistics[] summaryStatistics = new SummaryStatistics[numMetrics];
- // Init the array
- for (int i = 0; i < numMetrics; i++) {
- summaryStatistics[i] = new SummaryStatistics();
- }
-
- // Add sketch values to the summary statistic object
- final ArrayOfDoublesSketchIterator it = sketch.iterator();
- while (it.next()) {
- for (int i = 0; i < it.getValues().length; i++) {
- summaryStatistics[i].addValue(it.getValues()[i]);
- }
- }
-
- return summaryStatistics;
- }
-
}
diff --git a/src/test/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToMeansTest.java b/src/test/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToMeansTest.java
new file mode 100644
index 0000000..10a1be0
--- /dev/null
+++ b/src/test/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToMeansTest.java
@@ -0,0 +1,71 @@
+package com.yahoo.sketches.pig.tuple;
+
+import java.util.Random;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketch;
+import com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
+
+public class ArrayOfDoublesSketchToMeansTest {
+
+ @Test
+ public void nullInput() throws Exception {
+ EvalFunc<Tuple> func = new ArrayOfDoublesSketchToMeans();
+ Tuple resultTuple = func.exec(null);
+ Assert.assertNull(resultTuple);
+ }
+
+ @Test
+ public void emptyInputTuple() throws Exception {
+ EvalFunc<Tuple> func = new ArrayOfDoublesSketchToMeans();
+ Tuple resultTuple = func.exec(TupleFactory.getInstance().newTuple());
+ Assert.assertNull(resultTuple);
+ }
+
+ @Test
+ public void emptyInputSketch() throws Exception {
+ EvalFunc<Tuple> func = new ArrayOfDoublesSketchToMeans();
+ ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build();
+ Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()));
+ Tuple resultTuple = func.exec(inputTuple);
+ Assert.assertNull(resultTuple);
+ }
+
+ @Test
+ public void oneEntryInputSketch() throws Exception {
+ EvalFunc<Tuple> func = new ArrayOfDoublesSketchToMeans();
+ ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build();
+ sketch.update(1, new double[] {1});
+ Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()));
+ Tuple resultTuple = func.exec(inputTuple);
+ Assert.assertNotNull(resultTuple);
+ Assert.assertEquals(resultTuple.size(), 1);
+ Assert.assertEquals(resultTuple.get(0), 1.0);
+ }
+
+ @Test
+ public void manyEntriesTwoValuesInputSketch() throws Exception {
+ EvalFunc<Tuple> func = new ArrayOfDoublesSketchToMeans();
+ ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build();
+ Random rand = new Random(0);
+ int numKeys = 10000; // to saturate the sketch with default number of nominal entries (4K)
+ for (int i = 0; i < numKeys; i++ ) {
+ // two random values normally distributed with means of 0 and 1
+ sketch.update(i, new double[] {rand.nextGaussian(), rand.nextGaussian() + 1.0});
+ }
+ Assert.assertTrue(sketch.getRetainedEntries() >= 4096);
+ Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()));
+ Tuple resultTuple = func.exec(inputTuple);
+ Assert.assertNotNull(resultTuple);
+ Assert.assertEquals(resultTuple.size(), 2);
+ Assert.assertEquals((double) resultTuple.get(0), 0.0, 0.04);
+ Assert.assertEquals((double) resultTuple.get(1), 1.0, 0.04);
+ }
+
+}
diff --git a/src/test/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToVariancesTest.java b/src/test/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToVariancesTest.java
new file mode 100644
index 0000000..5604fe5
--- /dev/null
+++ b/src/test/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToVariancesTest.java
@@ -0,0 +1,71 @@
+package com.yahoo.sketches.pig.tuple;
+
+import java.util.Random;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketch;
+import com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
+
+public class ArrayOfDoublesSketchToVariancesTest {
+
+ @Test
+ public void nullInput() throws Exception {
+ EvalFunc<Tuple> func = new ArrayOfDoublesSketchToVariances();
+ Tuple resultTuple = func.exec(null);
+ Assert.assertNull(resultTuple);
+ }
+
+ @Test
+ public void emptyInputTuple() throws Exception {
+ EvalFunc<Tuple> func = new ArrayOfDoublesSketchToVariances();
+ Tuple resultTuple = func.exec(TupleFactory.getInstance().newTuple());
+ Assert.assertNull(resultTuple);
+ }
+
+ @Test
+ public void emptyInputSketch() throws Exception {
+ EvalFunc<Tuple> func = new ArrayOfDoublesSketchToVariances();
+ ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build();
+ Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()));
+ Tuple resultTuple = func.exec(inputTuple);
+ Assert.assertNull(resultTuple);
+ }
+
+ @Test
+ public void oneEntryInputSketch() throws Exception {
+ EvalFunc<Tuple> func = new ArrayOfDoublesSketchToVariances();
+ ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build();
+ sketch.update(1, new double[] {1});
+ Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()));
+ Tuple resultTuple = func.exec(inputTuple);
+ Assert.assertNotNull(resultTuple);
+ Assert.assertEquals(resultTuple.size(), 1);
+ Assert.assertEquals(resultTuple.get(0), 0.0);
+ }
+
+ @Test
+ public void manyEntriesTwoValuesInputSketch() throws Exception {
+ EvalFunc<Tuple> func = new ArrayOfDoublesSketchToVariances();
+ ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build();
+ Random rand = new Random(0);
+ int numKeys = 10000; // to saturate the sketch with default number of nominal entries (4K)
+ for (int i = 0; i < numKeys; i++ ) {
+ // two random values normally distributed with standard deviations of 1 and 10
+ sketch.update(i, new double[] {rand.nextGaussian(), rand.nextGaussian() * 10.0});
+ }
+ Assert.assertTrue(sketch.getRetainedEntries() >= 4096);
+ Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()));
+ Tuple resultTuple = func.exec(inputTuple);
+ Assert.assertNotNull(resultTuple);
+ Assert.assertEquals(resultTuple.size(), 2);
+ Assert.assertEquals((double) resultTuple.get(0), 1.0, 0.04);
+ Assert.assertEquals((double) resultTuple.get(1), 100.0, 100.0 * 0.04); // squared standard deviation within 4%
+ }
+
+}
diff --git a/src/test/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchesToPValueEstimatesTest.java b/src/test/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchesToPValueEstimatesTest.java
index 7f1a288..e5a4451 100644
--- a/src/test/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchesToPValueEstimatesTest.java
+++ b/src/test/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchesToPValueEstimatesTest.java
@@ -16,7 +16,6 @@
import com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.commons.math3.stat.inference.TTest;
-import org.apache.commons.math3.stat.StatUtils;
import java.util.Random;
@@ -26,6 +25,7 @@
public class ArrayOfDoublesSketchesToPValueEstimatesTest {
/**
* Check null input to UDF.
+ * @throws Exception
*/
@Test
public void nullInput() throws Exception {
@@ -38,6 +38,7 @@
/**
* Check input of empty tuple.
+ * @throws Exception
*/
@Test
public void emptyInput() throws Exception {
@@ -50,6 +51,7 @@
/**
* Check input of single empty sketch.
+ * @throws Exception
*/
@Test
public void oneEmptySketch() throws Exception {
@@ -66,6 +68,7 @@
/**
* Check input of two empty sketches.
+ * @throws Exception
*/
@Test
public void twoEmptySketches() throws Exception {
@@ -84,6 +87,7 @@
/**
* Check p-value for the smoker data set. Single metric.
+ * @throws Exception
*/
@Test
public void smokerDatasetSingleMetric() throws Exception {
@@ -128,6 +132,7 @@
/**
* Check p-value for a large data set.
+ * @throws Exception
*/
@Test
public void largeDataSet() throws Exception {
@@ -184,6 +189,7 @@
/**
* Check p-value for two metrics at the same time.
+ * @throws Exception
*/
@Test
public void twoMetrics() throws Exception {
@@ -229,6 +235,7 @@
/**
* Check with sketch having only one input.
+ * @throws Exception
*/
@Test
public void sketchWithSingleValue() throws Exception {