blob: 1347ded0f8303c2391296fcce528705f8f374eb2 [file] [log] [blame]
/*
* Copyright 2016, Yahoo! Inc.
* Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
*/
package com.yahoo.sketches.pig.tuple;
import java.io.IOException;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import com.yahoo.memory.Memory;
import com.yahoo.sketches.quantiles.DoublesSketch;
import com.yahoo.sketches.quantiles.UpdateDoublesSketch;
import com.yahoo.sketches.tuple.Sketch;
import com.yahoo.sketches.tuple.SketchIterator;
import com.yahoo.sketches.tuple.Sketches;
import com.yahoo.sketches.tuple.SummaryDeserializer;
import com.yahoo.sketches.tuple.adouble.DoubleSummary;
import com.yahoo.sketches.tuple.adouble.DoubleSummaryDeserializer;
/**
* This UDF is to get a percentile value from a Sketch<DoubleSummary>.
* The values from DoubleSummary objects in the sketch are extracted,
* and a single value with the given rank is returned. The rank is in
* percent. For example, 50th percentile is the median value of the
* distribution (the number separating the higher half of a probability
* distribution from the lower half).
*/
public class DoubleSummarySketchToPercentile extends EvalFunc<Double> {
private static final SummaryDeserializer<DoubleSummary> SUMMARY_DESERIALIZER =
new DoubleSummaryDeserializer();
private static final int QUANTILES_SKETCH_SIZE = 1024;
@Override
public Double exec(final Tuple input) throws IOException {
if (input.size() != 2) {
throw new IllegalArgumentException("expected two inputs: sketch and pecentile");
}
final DataByteArray dba = (DataByteArray) input.get(0);
final Sketch<DoubleSummary> sketch = Sketches.heapifySketch(
Memory.wrap(dba.get()), SUMMARY_DESERIALIZER);
final double percentile = (double) input.get(1);
if ((percentile < 0) || (percentile > 100)) {
throw new IllegalArgumentException("percentile must be between 0 and 100");
}
final UpdateDoublesSketch qs = DoublesSketch.builder().setK(QUANTILES_SKETCH_SIZE).build();
final SketchIterator<DoubleSummary> it = sketch.iterator();
while (it.next()) {
qs.update(it.getSummary().getValue());
}
return qs.getQuantile(percentile / 100);
}
}