blob: 963a5baa50b8a44f66a96c0cee27401e520c2311 [file] [log] [blame]
/*******************************************************************************
* Copyright 2015, Yahoo Inc.
* Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
*******************************************************************************/
package com.yahoo.sketches.hive.theta;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.BytesWritable;
import com.yahoo.memory.Memory;
import com.yahoo.sketches.theta.SetOperation;
import com.yahoo.sketches.theta.Sketch;
import com.yahoo.sketches.theta.Union;
/**
* Hive estimate sketch UDF.
*/
@SuppressWarnings("javadoc")
public class SampleSketchUDF extends UDF {
public static final int DEFAULT_SIZE = 16384;
/**
* Main logic called by hive, produces new sketch from original using
* specified size and sampling probability.
*
* @param binarySketch
* sketch to be sampled passed in as bytes writable.
* @param sketchSize
* Size to use for the new sketch.
* This must be a power of 2 and larger than 16. If zero, DEFAULT is used.
* @param probability
* The sampling probability to use for the new sketch.
* Should be greater than zero and less than or equal to 1.0
* @return The sampled sketch encoded as a BytesWritable
*/
public BytesWritable evaluate(BytesWritable binarySketch, int sketchSize, float probability) {
// Null checks
if (binarySketch == null) {
return null;
}
byte[] serializedSketch = binarySketch.getBytes();
if (serializedSketch.length <= 8) {
return null;
}
// The builder will catch errors with improper sketchSize or probability
Union union = SetOperation.builder().setP(probability).setNominalEntries(sketchSize).buildUnion();
union.update(Memory.wrap(serializedSketch)); //Union can accept Memory object directly
Sketch intermediateSketch = union.getResult(false, null); //to CompactSketch(unordered, on-heap)
byte[] resultSketch = intermediateSketch.toByteArray();
BytesWritable result = new BytesWritable();
result.set(resultSketch, 0, resultSketch.length);
return result;
}
}